Skip to content

Commit

Permalink
Skip removing nodes in <code> (#647)
Browse files Browse the repository at this point in the history
* skip removing nodes in <code> (especially common for comments in codeblocks)

* don't limit maxDepth
  • Loading branch information
jakubriedl committed Nov 23, 2020
1 parent 3c83389 commit 290724c
Show file tree
Hide file tree
Showing 4 changed files with 373 additions and 0 deletions.
5 changes: 5 additions & 0 deletions Readability.js
Expand Up @@ -912,6 +912,7 @@ Readability.prototype = {
if (this.REGEXPS.unlikelyCandidates.test(matchString) &&
!this.REGEXPS.okMaybeItsACandidate.test(matchString) &&
!this._hasAncestorTag(node, "table") &&
!this._hasAncestorTag(node, "code") &&
node.tagName !== "BODY" &&
node.tagName !== "A") {
this.log("Removing unlikely candidate - " + matchString);
Expand Down Expand Up @@ -2038,6 +2039,10 @@ Readability.prototype = {
return false;
}

if (this._hasAncestorTag(node, "code")) {
return false;
}

var weight = this._getClassWeight(node);

this.log("Cleaning Conditionally", node);
Expand Down
8 changes: 8 additions & 0 deletions test/test-pages/v8-blog/expected-metadata.json
@@ -0,0 +1,8 @@
{
"title": "standalone WebAssembly binaries using Emscripten · V8",
"byline": null,
"dir": null,
"excerpt": "Emscripten now supports standalone Wasm files, which do not need JavaScript.",
"siteName": null,
"readerable": true
}

0 comments on commit 290724c

Please sign in to comment.