Skip to content

Commit

Permalink
Expose "lang" attribute of original document from Readability.parse (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
micha154 committed Jan 26, 2022
1 parent 7d4c939 commit 1fde3ac
Show file tree
Hide file tree
Showing 5 changed files with 1,914 additions and 0 deletions.
6 changes: 6 additions & 0 deletions Readability.js
Expand Up @@ -896,6 +896,11 @@ Readability.prototype = {
let shouldRemoveTitleHeader = true;

while (node) {

if (node.tagName === "HTML") {
this._articleLang = node.getAttribute("lang");
}

var matchString = node.className + " " + node.id;

if (!this._isProbablyVisible(node)) {
Expand Down Expand Up @@ -2263,6 +2268,7 @@ Readability.prototype = {
title: this._articleTitle,
byline: metadata.byline || this._articleByline,
dir: this._articleDir,
lang: this._articleLang,
content: this._serializer(articleContent),
textContent: textContent,
length: textContent.length,
Expand Down
9 changes: 9 additions & 0 deletions test/test-pages/nytimes-5/expected-metadata.json
@@ -0,0 +1,9 @@
{
"title": "The New York Times en Español",
"byline": "Tariq Panja",
"dir": null,
"lang": "es",
"excerpt": "Entérate de lo que está pasando en el mundo y de las noticias económicas, de negocios, tecnología, arte, estilos de vida, deporte, ciencia y opiniones. No importa cuáles sean tus intereses: el Times lo cubre con inmejorable calidad, profundidad e independencia.",
"siteName": null,
"readerable": false
}

0 comments on commit 1fde3ac

Please sign in to comment.