Skip to content

Commit

Permalink
Merge pull request #148 from nonara/fix-144
Browse files Browse the repository at this point in the history
Fix 144
  • Loading branch information
taoqf committed Sep 18, 2021
2 parents df01360 + 5a44c8f commit fd3ec55
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 11 deletions.
12 changes: 9 additions & 3 deletions src/nodes/html.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1025,6 +1025,7 @@ export function base_parse(data: string, options = { lowerCaseTagName: false, co
let currentParent = root;
const stack = [root];
let lastTextPos = -1;
let noNestedTagIndex: undefined | number = undefined;
let match: RegExpExecArray;
// https://github.com/taoqf/node-html-parser/issues/38
data = `<${frameflag}>${data}</${frameflag}>`;
Expand Down Expand Up @@ -1081,9 +1082,13 @@ export function base_parse(data: string, options = { lowerCaseTagName: false, co
}
}

if (currentParent.rawTagName === 'a' && match[2] === 'a') {
stack.pop();
currentParent = arr_back(stack);
// Prevent nested A tags by terminating the last A and starting a new one : see issue #144
if (match[2] === 'a' || match[2] === 'A') {
if (noNestedTagIndex !== undefined) {
stack.splice(noNestedTagIndex);
currentParent = arr_back(stack);
}
noNestedTagIndex = stack.length;
}

const tagEndPos = kMarkupPattern.lastIndex;
Expand Down Expand Up @@ -1123,6 +1128,7 @@ export function base_parse(data: string, options = { lowerCaseTagName: false, co
// Handle closing tags or self-closed elements (ie </tag> or <br>)
if (match[1] || match[4] || kSelfClosingElements[match[2]]) {
while (true) {
if (match[2] === 'a' || match[2] === 'A') noNestedTagIndex = undefined;
if (currentParent.rawTagName === match[2]) {
// Update range end for closed tag
(<[number, number]>currentParent.range)[1] = createRange(-1, Math.max(lastTextPos, tagEndPos))[1];
Expand Down
32 changes: 24 additions & 8 deletions test/144.js
Original file line number Diff line number Diff line change
@@ -1,18 +1,34 @@
const { parse } = require('../dist');
const { parse, NodeType } = require('../dist');

// Also see comments on https://github.com/taoqf/node-html-parser/pull/148 for additional issues corrected
describe('issue 144', function () {
it('Nested A tags parsed improperly', function () {
const html = `<a href="#">link <a href="#">nested link</a> end</a>`;
const html = `<A href="#"><b>link <a href="#">nested link</a> end</b></A>`;

const root = parse(html);
root.innerHTML.should.eql(`<a href="#">link </a><a href="#">nested link</a> end`);

root.innerHTML.should.eql(`<A href="#"><b>link </b></A><a href="#">nested link</a> end`);
root.childNodes.length.should.eql(3);

const a1 = root.childNodes[0];
a1.tagName.should.eql('A');
a1.nodeType.should.eql(1);
a1.nodeType.should.eql(NodeType.ELEMENT_NODE);
a1.childNodes.length.should.eql(1);

const b = a1.childNodes[0];
b.tagName.should.eql('B');
b.childNodes.length.should.eql(1);
b.text.should.eql('link ');

const a2 = root.childNodes[1];
a2.nodeType.should.eql(1);
const t1 = root.childNodes[2];
t1.nodeType.should.eql(3);
t1.textContent.should.eql(' end');
a2.tagName.should.eql('A');
a2.nodeType.should.eql(NodeType.ELEMENT_NODE);
a2.childNodes.length.should.eql(1);
a2.childNodes[0].nodeType.should.eql(NodeType.TEXT_NODE);
a2.text.should.eql('nested link');

const endText = root.childNodes[2];
endText.nodeType.should.eql(NodeType.TEXT_NODE);
endText.textContent.should.eql(' end');
});
});

0 comments on commit fd3ec55

Please sign in to comment.