feat: Escape html codes when getting node contents (#6198)

It would appear that the previous `textContent` api transformed html entities. This PR provides feature parity with the DOMParser approach.
shaka-project · Jan 31, 2024 · a1c1620 · a1c1620
1 parent 78c12a6
commit a1c1620
Show file tree

Hide file tree

Showing 2 changed files with 31 additions and 2 deletions.
diff --git a/lib/util/tXml.js b/lib/util/tXml.js
@@ -386,8 +386,9 @@ shaka.util.TXml = class {
    * @return {?string} The text contents, or null if there are none.
    */
   static getTextContents(node) {
+    const StringUtils = shaka.util.StringUtils;
     if (typeof node === 'string') {
-      return node;
+      return StringUtils.htmlUnescape(node);
     }
     const textContent = node.children.reduce(
         (acc, curr) => (typeof curr === 'string' ? acc + curr : acc),
@@ -396,7 +397,7 @@ shaka.util.TXml = class {
     if (textContent === '') {
       return null;
     }
-    return textContent;
+    return StringUtils.htmlUnescape(textContent);
   }
 
   /**

diff --git a/test/util/tXml_unit.js b/test/util/tXml_unit.js
@@ -110,6 +110,34 @@ describe('tXml', () => {
 
       expect(TXml.getContents(root)).toBe('<Foo> Bar');
     });
+
+    it('unescapes html codes', () => {
+      const xmlString = [
+        '<?xml version="1.0"?>',
+        '<Root>',
+        '  &amp;&gt;&lt;',
+        '</Root>',
+      ].join('\n');
+      const root = TXml.parseXmlString(xmlString, 'Root');
+      goog.asserts.assert(root, 'parseFromString should succeed');
+
+      expect(TXml.getContents(root)).toBe('&><');
+    });
+  });
+
+  describe('getTextContents', () => {
+    it('unescapes html codes', () => {
+      const xmlString = [
+        '<?xml version="1.0"?>',
+        '<Root>',
+        '  &amp;&gt;&lt;',
+        '</Root>',
+      ].join('\n');
+      const root = TXml.parseXmlString(xmlString, 'Root');
+      goog.asserts.assert(root, 'parseFromString should succeed');
+
+      expect(TXml.getTextContents(root)).toBe('\n  &><\n');
+    });
   });
 
   describe('parseAttr', () => {