Skip to content

Commit

Permalink
Add optimization that prunes nested <g>-tags
Browse files Browse the repository at this point in the history
An optimization that prunes nested <g>-tags when they contain exactly
one <g> and nothing else (except whitespace nodes).  This looks a bit
like `removeNestedGroups` except it only touches <g> tags without
attributes (but can remove <g>-tags completely from a tree, whereas
this optimization always leaves at least one <g> tag behind).

Closes: scour-project#215
Signed-off-by: Niels Thykier <niels@thykier.net>
  • Loading branch information
nthykier committed May 17, 2020
1 parent 7e917c9 commit 942fc04
Show file tree
Hide file tree
Showing 5 changed files with 217 additions and 0 deletions.
73 changes: 73 additions & 0 deletions scour/scour.py
Original file line number Diff line number Diff line change
Expand Up @@ -1143,6 +1143,78 @@ def moveCommonAttributesToParentGroup(elem, referencedElements):
return num


def mergeSingleChildGroupIntoParent(elem):
"""
Merge <g X></g Y>...</g></g> into <g X Y>...</g>
Optimize the pattern:
<g a="1" c="4"><g a="2" b="3">...</g>/</g>
into
<g a="2" b="3" c="4">...</g>
This is only possible when:
* The parent has exactly one <g>-child node (ignoring whitespace)
* The child node is mergeable (per :func:`g_tag_is_unmergeable`)
Note that this function overlaps to some extend with `removeNestedGroups`.
However, `removeNestedGroups` work entirely on empty <g> and can completely
remove empty `<g>` tags. This works on any <g> tag containing <g> tags
(regardless of their attributes) and as such this function can never
completely eliminate all <g>-tags (but it does deal with attributes).
This function acts recursively on the given element.
"""
num = 0

# Depth first - fix child notes first
for childNode in elem.childNodes:
if childNode.nodeType == Node.ELEMENT_NODE:
num += mergeSingleChildGroupIntoParent(childNode)

# The elem node must be a <g> tag for this to be relevant.
if elem.nodeType != Node.ELEMENT_NODE or elem.nodeName != 'g' or \
elem.namespaceURI != NS['SVG']:
return num

if g_tag_is_unmergeable(elem):
# elem itself is protected, then leave it alone.
return num

g_node_idx = None
for idx, node in enumerate(elem.childNodes):
if node.nodeType == Node.TEXT_NODE and not node.nodeValue.strip():
# whitespace-only node; ignore
continue
if node.nodeType != Node.ELEMENT_NODE or node.nodeName != 'g' or \
node.namespaceURI != NS['SVG']:
# The elem node has something other than <g> tag; then this
# optimization does not apply
return num
if g_tag_is_unmergeable(node) or g_node_idx is not None:
# The elem node has two or more <g> tags or the <g> node it has
# is unmergeable; then this optimization does not apply
return num
g_node_idx = idx

# We got a optimization candidate
g_node = elem.childNodes[g_node_idx]
elem.childNodes.remove(g_node)

elem.childNodes[g_node_idx:g_node_idx] = g_node.childNodes
g_node.childNodes = []

for a in g_node.attributes.values():
elem.setAttribute(a.nodeName, a.nodeValue)

num += 1

return num


def mergeSiblingGroupsWithCommonAttributes(elem):
"""
Merge two or more sibling <g> elements with the identical attributes.
Expand Down Expand Up @@ -3754,6 +3826,7 @@ def xmlnsUnused(prefix, namespace):
# Collapse groups LAST, because we've created groups. If done before
# moveAttributesToParentGroup, empty <g>'s may remain.
if options.group_collapse:
_num_elements_removed += mergeSingleChildGroupIntoParent(doc.documentElement)
while removeNestedGroups(doc.documentElement) > 0:
pass

Expand Down
99 changes: 99 additions & 0 deletions test_scour.py
Original file line number Diff line number Diff line change
Expand Up @@ -2075,6 +2075,105 @@ def runTest(self):
'Erroneously removed a <g> in a <switch>')


class GroupParentMerge(unittest.TestCase):

def test_parent_merge(self):
doc = scourXmlFile('unittests/group-parent-merge.svg',
parse_args([]))
g_tags = doc.getElementsByTagName('g')
attrs = {
'font-family': 'Liberation Sans,Arial,Helvetica,sans-serif',
'text-anchor': 'middle',
'font-weight': '400',
'font-size': '24',
}
self.assertEqual(g_tags.length, 1,
'Inline single-child node <g> tags into parent <g>-tags')

g_tag = g_tags[0]
for attr_name, attr_value in attrs.items():

self.assertEqual(g_tag.getAttribute(attr_name), attr_value,
'Parent now has inherited attributes of obsolete <g>-tags')

def test_parent_merge_disabled(self):
doc = scourXmlFile('unittests/group-parent-merge.svg',
parse_args(['--disable-group-collapsing']))
g_tags = doc.getElementsByTagName('g')
attrs = {
'font-family': 'Liberation Sans,Arial,Helvetica,sans-serif',
'text-anchor': '',
'font-weight': '',
'font-size': '',
}
self.assertEqual(g_tags.length, 4,
'Inline single-child node <g> tags into parent <g>-tags')

# There should be exactly one <g> tag in the top of the document
# Note that the order returned by getElementsByTagName is not specified
# so we do not rely on its return value
g_tags = [g for g in doc.documentElement.childNodes if g.nodeName == 'g']
self.assertEqual(len(g_tags), 1,
'Optimization must not move the <g> up to the root')
g_tag = g_tags[0]
for attr_name, attr_value in attrs.items():
self.assertEqual(g_tag.getAttribute(attr_name), attr_value,
'Parent now has inherited attributes of obsolete <g>-tags')

def test_parent_merge2(self):
doc = scourXmlFile('unittests/group-parent-merge2.svg',
parse_args([]))
attrs = {
'font-family': 'Liberation Sans,Arial,Helvetica,sans-serif',
'text-anchor': 'middle',
'font-weight': '400',
'font-size': '', # The top-level g-node cannot have gotten this.
}
# There is one inner <g> that cannot be optimized, so there must be 2
# <g> tags in total
self.assertEqual(doc.getElementsByTagName('g').length, 2,
'Inline single-child node <g> tags into parent <g>-tags')

# There should be exactly one <g> tag in the top of the document
# Note that the order returned by getElementsByTagName is not specified
# so we do not rely on its return value
g_tags = [g for g in doc.documentElement.childNodes if g.nodeName == 'g']
self.assertEqual(len(g_tags), 1,
'Optimization must not move the <g> up to the root')
g_tag = g_tags[0]
for attr_name, attr_value in attrs.items():
self.assertEqual(g_tag.getAttribute(attr_name), attr_value,
'Parent now has inherited attributes of obsolete <g>-tags')

def test_parent_merge3(self):
doc = scourXmlFile('unittests/group-parent-merge3.svg',
parse_args(['--protect-ids-list=foo']))
attrs = {
'font-family': 'Liberation Sans,Arial,Helvetica,sans-serif',
'text-anchor': 'middle',
'font-weight': '400',
'font-size': '', # The top-level g-node cannot have gotten this.
}
# There is one inner <g> that cannot be optimized, so there must be 2
# <g> tags in total
self.assertEqual(doc.getElementsByTagName('g').length, 2,
'Inline single-child node <g> tags into parent <g>-tags')

self.assertIsNotNone(doc.getElementById('foo'), 'The inner <g> was left untouched')

# There should be exactly one <g> tag in the top of the document
# Note that the order returned by getElementsByTagName is not specified
# so we do not rely on its return value
g_tags = [g for g in doc.documentElement.childNodes if g.nodeName == 'g']
self.assertEqual(len(g_tags), 1,
'Optimization must not move the <g> up to the root')
g_tag = g_tags[0]
for attr_name, attr_value in attrs.items():
self.assertEqual(g_tag.getAttribute(attr_name), attr_value,
'Parent now has inherited attributes of obsolete <g>-tags')



class GroupSiblingMerge(unittest.TestCase):

def test_sibling_merge(self):
Expand Down
15 changes: 15 additions & 0 deletions unittests/group-parent-merge.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
15 changes: 15 additions & 0 deletions unittests/group-parent-merge2.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
15 changes: 15 additions & 0 deletions unittests/group-parent-merge3.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.

0 comments on commit 942fc04

Please sign in to comment.