From 9244a7f5f0b92f9b7974572b10653b207edc6c58 Mon Sep 17 00:00:00 2001
From: Karl Voit
Date: Sat, 3 Oct 2020 16:53:10 +0200
Subject: [PATCH] Reading time estimations, fixing #47
---
README.org | 2 +
lib/htmlizer.py | 81 ++++++++++++++++++-
templates/blog-format.org | 36 +++++++++
templates/public_voit.css | 19 +++--
templates/public_voit.scss | 11 +++
.../1985/01/01/old-entry1/index.html | 4 +
.../1985/01/01/old-entry10/index.html | 4 +
.../1985/01/01/old-entry11/index.html | 4 +
.../1985/01/01/old-entry12/index.html | 4 +
.../1985/01/01/old-entry13/index.html | 4 +
.../1985/01/01/old-entry14/index.html | 4 +
.../1985/01/01/old-entry2/index.html | 4 +
.../1985/01/01/old-entry3/index.html | 4 +
.../1985/01/01/old-entry4/index.html | 4 +
.../1985/01/01/old-entry5/index.html | 4 +
.../1985/01/01/old-entry6/index.html | 4 +
.../1985/01/01/old-entry7/index.html | 4 +
.../1985/01/01/old-entry8/index.html | 4 +
.../1985/01/01/old-entry9/index.html | 4 +
.../02/14/lazyblorg-example-entry/index.html | 4 +
.../comparison/2013/08/22/testid/index.html | 4 +
.../2014/01/30/full-syntax-test/index.html | 4 +
.../09/18/from-nothing-to-done/index.html | 4 +
.../10/31/an-hidden-blog-entry/index.html | 6 +-
.../2016/10/31/my-temporal-article/index.html | 6 +-
.../11/06/sanitization-examples/index.html | 4 +
.../16/empty-language-autotag-page/index.html | 4 +
.../2016/11/27/image-test/index.html | 8 +-
.../2016/11/27/special-characters/index.html | 6 +-
.../2017/01/08/sanitizing-tests/index.html | 4 +
.../2017/09/30/link-test/index.html | 68 ++++++++++++++--
.../2017/09/30/link-test/source.org.txt | 65 +++++++++++++++
.../index.html | 4 +
.../comparison/about/index.html | 4 +
...zyblorg-all.atom_1.0.links-and-content.xml | 71 +++++++++++++++-
...azyblorg-all.atom_1.0.links-and-teaser.xml | 4 +-
.../lazyblorg-all.atom_1.0.links-only.xml | 4 +-
.../how-to-use-public-voit/index.html | 4 +
.../end_to_end_test/comparison/index.html | 2 +-
.../comparison/persistent-entry/index.html | 6 +-
.../comparison/tags/exampletag/index.html | 8 +-
.../comparison/tags/lazyblorg/index.html | 4 +-
.../comparison/tags/mytest/index.html | 4 +-
.../comparison/tags/programming/index.html | 6 +-
.../comparison/tags/testtag1/index.html | 2 +
.../orgfiles/real-world-entries.org | 70 +++++++++++++++-
46 files changed, 552 insertions(+), 33 deletions(-)
diff --git a/README.org b/README.org
index 8890b61..5b3c943 100644
--- a/README.org
+++ b/README.org
@@ -296,6 +296,8 @@ efficiently:
the entry page, navigational pages, and the feeds. This way, you
can publish pages who can only be access by people knowing its URL.
+- Reading time estimations (multi-language) following [[https://github.com/novoid/lazyblorg/issues/47][this feature request]]
+
** FAQs
See https://github.com/novoid/lazyblorg/wiki/FAQs
diff --git a/lib/htmlizer.py b/lib/htmlizer.py
index ccf1229..2788c85 100644
--- a/lib/htmlizer.py
+++ b/lib/htmlizer.py
@@ -1,5 +1,5 @@
# -*- coding: utf-8; mode: python; -*-
-# Time-stamp: <2020-10-03 16:27:56 vk>
+# Time-stamp: <2020-10-03 16:45:42 vk>
import config # lazyblorg-global settings
import sys
@@ -7,6 +7,7 @@
import os
from datetime import datetime
from time import time, localtime, strftime
+from math import ceil # for calculating reading time
import re # RegEx: for parsing/sanitizing
import codecs
from lib.utils import Utils # for guess_language_from_stopword_percentages()
@@ -313,6 +314,28 @@ def _populate_dict_of_tags_with_ids(self, blog_data):
return dict_of_tags_with_ids
+ def _derive_reading_length(self, rawcontent: str) -> int:
+ """
+ Determines the number of minutes reading time from the rawcontent of the article.
+
+ Assumption: people are able to read 250 words per minute.
+
+ See https://github.com/novoid/lazyblorg/issues/47 for the idea and implementation notes.
+ """
+
+ # remove heading title and drawer in order to get body of content:
+ rawcontent_without_header: str = re.sub(r':PROPERTIES:.+?:END:\n', '', rawcontent, flags=re.DOTALL)
+
+ # remove all "words" (according to split()) which contains numbers or other characters that are indicators of non-word elements:
+ raw_words: list = [x for x in rawcontent_without_header.split() if not re.match(r'.*[|0123456789].*', x)]
+ raw_words = [x for x in raw_words if not x.startswith(('#+', '-', ':'))]
+
+ minutes: int = ceil(len(raw_words) / 250)
+
+ if minutes == 0:
+ minutes = 1 # even empty articles should take one minute to watch at
+ return minutes
+
def _generate_pages_for_tags_persistent_temporal(self, tags):
"""
Method that creates the pages for tag-pages, persistent pages, and temporal pages.
@@ -348,6 +371,10 @@ def _generate_pages_for_tags_persistent_temporal(self, tags):
entry = self.sanitize_and_htmlize_blog_content(entry)
+ # populate reading time indicator:
+ if 'rawcontent' in entry.keys():
+ entry['reading_minutes'] = self._derive_reading_length(entry['rawcontent'])
+
htmlcontent = None
if entry['category'] == config.TAGS:
@@ -475,6 +502,7 @@ def _generate_tag_page(self, entry):
htmlcontent += self._replace_general_article_placeholders(
entry, content)
htmlcontent = self.sanitize_internal_links(htmlcontent)
+ htmlcontent = self._insert_reading_minutes_if_found(entry, htmlcontent)
return htmlfilename, orgfilename, htmlcontent
@@ -1967,6 +1995,53 @@ def _generate_temporal_article(self, entry):
htmlcontent += self._replace_general_article_placeholders(
entry, content)
htmlcontent = self.sanitize_internal_links(htmlcontent)
+ htmlcontent = self._insert_reading_minutes_if_found(entry, htmlcontent)
+
+ return htmlfilename, orgfilename, htmlcontent
+
+ def _insert_reading_minutes_if_found(self, entry, htmlcontent):
+ """
+ Handles the snippet that contains the estimation for the reading minutes.
+ Deletes the snippet of the template if none found.
+ """
+ content = ''
+ if 'reading_minutes' in entry.keys():
+ if '#READING-MINUTES-SECTION#' in htmlcontent:
+ # insert snippet
+ snippetname = 'reading-time-'
+
+ # handle one or many minutes: (I do have different snippets for those cases)
+ if entry['reading_minutes'] == 1:
+ snippetname += 'one-minute-'
+ else:
+ snippetname += 'multiple-minutes-'
+
+ # handle different languages:
+ if entry['autotags']['language'] == 'deutsch':
+ # FIXXME: other languages than german have to be added
+ # here: (generalize using a configured list of known
+ # languages?)
+ snippetname += 'de'
+ else:
+ snippetname += 'en'
+
+ # insert snippet:
+ content = htmlcontent.replace('#READING-MINUTES-SECTION#', self.template_definition_by_name(snippetname))
+ # replace actual minutes (if found):
+ content = self._replace_general_article_placeholders(entry, content)
+ return content
+ else:
+ # remove template snippet because we've got no minutes to insert
+ # NOTE: Should be dead code
+ logging.warning('Entry %s: missing reading minutes, removing snippet' % entry['id'])
+ return htmlcontent.replace('#READING-MINUTES-SECTION#', '')
+ else:
+ # missing reading minutes should only be OK with
+ # auto-generated tag pages. Report error if otherwise:
+ if not entry['id'].startswith(self.ID_PREFIX_FOR_EMPTY_TAG_PAGES):
+ logging.warning('Entry %s: missing reading minutes in "entry[]"' % entry['id'])
+ return htmlcontent
+
return htmlfilename, orgfilename, htmlcontent
@@ -2018,6 +2093,7 @@ def _generate_persistent_article(self, entry):
htmlcontent += self._replace_general_article_placeholders(
entry, content)
htmlcontent = self.sanitize_internal_links(htmlcontent)
+ htmlcontent = self._insert_reading_minutes_if_found(entry, htmlcontent)
return htmlfilename, orgfilename, htmlcontent
@@ -2158,6 +2234,9 @@ def _replace_general_article_placeholders(self, entry, template):
self._generate_tag_page_list(
entry['title']))
+ if 'reading_minutes' in entry.keys():
+ content = content.replace('#READINGMINUTES#', str(entry['reading_minutes']))
+
return content
def _generate_top_tag_list(self):
diff --git a/templates/blog-format.org b/templates/blog-format.org
index 3d77b5b..c03a41b 100644
--- a/templates/blog-format.org
+++ b/templates/blog-format.org
@@ -283,6 +283,36 @@ With name/label:
#+END_EXPORT
+*** Reading time indicators
+
+#+NAME: reading-time-one-minute-en
+#+BEGIN_HTML
+
+ Reading time is one minute
+
+#+END_HTML
+
+#+NAME: reading-time-one-minute-de
+#+BEGIN_HTML
+
+ Lesezeit ist eine Minute
+
+#+END_HTML
+
+#+NAME: reading-time-multiple-minutes-en
+#+BEGIN_HTML
+
+ Reading time is #READINGMINUTES# minutes
+
+#+END_HTML
+
+#+NAME: reading-time-multiple-minutes-de
+#+BEGIN_HTML
+
+ Lesezeit ist #READINGMINUTES# Minuten
+
+#+END_HTML
+
*** ignore me
#+NAME:
@@ -542,6 +572,8 @@ With name/label:
#ARTICLE-TITLE#
+ #READING-MINUTES-SECTION#
+
#COMMON-SIDEBAR#
@@ -730,6 +762,8 @@ With name/label:
#ARTICLE-TITLE#
+ #READING-MINUTES-SECTION#
+
#COMMON-SIDEBAR#
@@ -1029,6 +1063,8 @@ Pages that describe a tag. Corresponding Org-mode entries must have:
Tag Page for the Tag "#ARTICLE-TITLE#"
+ #READING-MINUTES-SECTION#
+
#COMMON-SIDEBAR#
diff --git a/templates/public_voit.css b/templates/public_voit.css
index 6e7d30b..405e91a 100644
--- a/templates/public_voit.css
+++ b/templates/public_voit.css
@@ -39,7 +39,7 @@ a {
padding-left: 1em;
padding-right: 1em; }
- .middle_section_wide, header, .published-on, footer, .temporal-article-header-nav, .month-overview-header-nav, .entrypage-article-header-nav, .persistent-article-header-nav {
+ .middle_section_wide, header, .reading-time-section, .published-on, footer, .temporal-article-header-nav, .month-overview-header-nav, .entrypage-article-header-nav, .persistent-article-header-nav {
margin-left: auto;
margin-right: auto;
max-width: 45em; }
@@ -103,7 +103,7 @@ a {
padding-left: 1em;
padding-right: 1em; }
- .middle_section_wide, header, .published-on, footer, .temporal-article-header-nav, .month-overview-header-nav, .entrypage-article-header-nav, .persistent-article-header-nav {
+ .middle_section_wide, header, .reading-time-section, .published-on, footer, .temporal-article-header-nav, .month-overview-header-nav, .entrypage-article-header-nav, .persistent-article-header-nav {
margin-left: auto;
margin-right: auto;
max-width: 45em; }
@@ -139,7 +139,7 @@ a {
padding-left: 1em;
padding-right: 1em; }
- .middle_section_wide, header, .published-on, footer, .temporal-article-header-nav, .month-overview-header-nav, .entrypage-article-header-nav, .persistent-article-header-nav {
+ .middle_section_wide, header, .reading-time-section, .published-on, footer, .temporal-article-header-nav, .month-overview-header-nav, .entrypage-article-header-nav, .persistent-article-header-nav {
margin-left: auto;
margin-right: auto;
max-width: 45em; }
@@ -173,7 +173,7 @@ a {
margin-right: auto;
max-width: 40em; }
- .middle_section_wide, header, .published-on, footer, .temporal-article-header-nav, .month-overview-header-nav, .entrypage-article-header-nav, .persistent-article-header-nav {
+ .middle_section_wide, header, .reading-time-section, .published-on, footer, .temporal-article-header-nav, .month-overview-header-nav, .entrypage-article-header-nav, .persistent-article-header-nav {
margin-left: auto;
margin-right: auto;
max-width: 60em; }
@@ -207,7 +207,7 @@ a {
margin-right: auto;
max-width: 40em; }
- .middle_section_wide, header, .published-on, footer, .temporal-article-header-nav, .month-overview-header-nav, .entrypage-article-header-nav, .persistent-article-header-nav {
+ .middle_section_wide, header, .reading-time-section, .published-on, footer, .temporal-article-header-nav, .month-overview-header-nav, .entrypage-article-header-nav, .persistent-article-header-nav {
margin-left: auto;
margin-right: auto;
max-width: 60em; }
@@ -452,6 +452,15 @@ figcaption {
color: #252525;
font-size: 80%; }
+.reading-time-section {
+ font-size: 80%;
+ color: gray;
+ text-align: right;
+ margin-left: auto;
+ margin-right: auto;
+ /* padding-top: 0.2em;*/
+ padding-right: 15em; }
+
.published-on {
font-size: 80%;
color: gray;
diff --git a/templates/public_voit.scss b/templates/public_voit.scss
index 25cf1c9..bdeac46 100644
--- a/templates/public_voit.scss
+++ b/templates/public_voit.scss
@@ -512,6 +512,17 @@ figcaption {
font-size: 80%;
}
+.reading-time-section {
+ @extend .middle_section_wide;
+ font-size: 80%;
+ color: gray;
+ text-align: right;
+ margin-left: auto;
+ margin-right: auto;
+ /* padding-top: 0.2em;*/
+ padding-right: 15em;
+}
+
.published-on {
@extend .middle_section_wide;
font-size: 80%;
diff --git a/testdata/end_to_end_test/comparison/1985/01/01/old-entry1/index.html b/testdata/end_to_end_test/comparison/1985/01/01/old-entry1/index.html
index ba25d7f..c47e8ac 100644
--- a/testdata/end_to_end_test/comparison/1985/01/01/old-entry1/index.html
+++ b/testdata/end_to_end_test/comparison/1985/01/01/old-entry1/index.html
@@ -59,6 +59,10 @@
old blog entry1
+
+ Reading time is one minute
+
+
diff --git a/testdata/end_to_end_test/comparison/1985/01/01/old-entry10/index.html b/testdata/end_to_end_test/comparison/1985/01/01/old-entry10/index.html
index 61759fd..5ea8141 100644
--- a/testdata/end_to_end_test/comparison/1985/01/01/old-entry10/index.html
+++ b/testdata/end_to_end_test/comparison/1985/01/01/old-entry10/index.html
@@ -59,6 +59,10 @@
old blog entry10
+
+ Reading time is one minute
+
+
diff --git a/testdata/end_to_end_test/comparison/1985/01/01/old-entry11/index.html b/testdata/end_to_end_test/comparison/1985/01/01/old-entry11/index.html
index 6d9d6fe..8dd69dc 100644
--- a/testdata/end_to_end_test/comparison/1985/01/01/old-entry11/index.html
+++ b/testdata/end_to_end_test/comparison/1985/01/01/old-entry11/index.html
@@ -59,6 +59,10 @@
old blog entry11
+
+ Reading time is one minute
+
+
diff --git a/testdata/end_to_end_test/comparison/1985/01/01/old-entry12/index.html b/testdata/end_to_end_test/comparison/1985/01/01/old-entry12/index.html
index 90d960d..d4c522a 100644
--- a/testdata/end_to_end_test/comparison/1985/01/01/old-entry12/index.html
+++ b/testdata/end_to_end_test/comparison/1985/01/01/old-entry12/index.html
@@ -59,6 +59,10 @@
old blog entry12
+
+ Reading time is one minute
+
+
diff --git a/testdata/end_to_end_test/comparison/1985/01/01/old-entry13/index.html b/testdata/end_to_end_test/comparison/1985/01/01/old-entry13/index.html
index fbd7495..e914bcc 100644
--- a/testdata/end_to_end_test/comparison/1985/01/01/old-entry13/index.html
+++ b/testdata/end_to_end_test/comparison/1985/01/01/old-entry13/index.html
@@ -59,6 +59,10 @@
old blog entry13
+
+ Reading time is one minute
+
+
diff --git a/testdata/end_to_end_test/comparison/1985/01/01/old-entry14/index.html b/testdata/end_to_end_test/comparison/1985/01/01/old-entry14/index.html
index ed54f13..dee5eff 100644
--- a/testdata/end_to_end_test/comparison/1985/01/01/old-entry14/index.html
+++ b/testdata/end_to_end_test/comparison/1985/01/01/old-entry14/index.html
@@ -59,6 +59,10 @@
old blog entry14
+
+ Reading time is one minute
+
+
diff --git a/testdata/end_to_end_test/comparison/1985/01/01/old-entry2/index.html b/testdata/end_to_end_test/comparison/1985/01/01/old-entry2/index.html
index cf9f6d3..f2e38c8 100644
--- a/testdata/end_to_end_test/comparison/1985/01/01/old-entry2/index.html
+++ b/testdata/end_to_end_test/comparison/1985/01/01/old-entry2/index.html
@@ -59,6 +59,10 @@
old blog entry2
+
+ Reading time is one minute
+
+
diff --git a/testdata/end_to_end_test/comparison/1985/01/01/old-entry3/index.html b/testdata/end_to_end_test/comparison/1985/01/01/old-entry3/index.html
index f44fbf1..35c98b1 100644
--- a/testdata/end_to_end_test/comparison/1985/01/01/old-entry3/index.html
+++ b/testdata/end_to_end_test/comparison/1985/01/01/old-entry3/index.html
@@ -59,6 +59,10 @@
old blog entry3
+
+ Reading time is one minute
+
+
diff --git a/testdata/end_to_end_test/comparison/1985/01/01/old-entry4/index.html b/testdata/end_to_end_test/comparison/1985/01/01/old-entry4/index.html
index 22a1884..f8a3fac 100644
--- a/testdata/end_to_end_test/comparison/1985/01/01/old-entry4/index.html
+++ b/testdata/end_to_end_test/comparison/1985/01/01/old-entry4/index.html
@@ -59,6 +59,10 @@
old blog entry4
+
+ Reading time is one minute
+
+
diff --git a/testdata/end_to_end_test/comparison/1985/01/01/old-entry5/index.html b/testdata/end_to_end_test/comparison/1985/01/01/old-entry5/index.html
index 06435cc..3451b54 100644
--- a/testdata/end_to_end_test/comparison/1985/01/01/old-entry5/index.html
+++ b/testdata/end_to_end_test/comparison/1985/01/01/old-entry5/index.html
@@ -59,6 +59,10 @@
old blog entry5
+
+ Reading time is one minute
+
+
diff --git a/testdata/end_to_end_test/comparison/1985/01/01/old-entry6/index.html b/testdata/end_to_end_test/comparison/1985/01/01/old-entry6/index.html
index 63ca416..8a680c1 100644
--- a/testdata/end_to_end_test/comparison/1985/01/01/old-entry6/index.html
+++ b/testdata/end_to_end_test/comparison/1985/01/01/old-entry6/index.html
@@ -59,6 +59,10 @@
old blog entry6
+
+ Reading time is one minute
+
+
diff --git a/testdata/end_to_end_test/comparison/1985/01/01/old-entry7/index.html b/testdata/end_to_end_test/comparison/1985/01/01/old-entry7/index.html
index 839ffee..a271eb0 100644
--- a/testdata/end_to_end_test/comparison/1985/01/01/old-entry7/index.html
+++ b/testdata/end_to_end_test/comparison/1985/01/01/old-entry7/index.html
@@ -59,6 +59,10 @@
old blog entry7
+
+ Reading time is one minute
+
+
diff --git a/testdata/end_to_end_test/comparison/1985/01/01/old-entry8/index.html b/testdata/end_to_end_test/comparison/1985/01/01/old-entry8/index.html
index 9b7192f..f1fbe8c 100644
--- a/testdata/end_to_end_test/comparison/1985/01/01/old-entry8/index.html
+++ b/testdata/end_to_end_test/comparison/1985/01/01/old-entry8/index.html
@@ -59,6 +59,10 @@
old blog entry8
+
+ Reading time is one minute
+
+
diff --git a/testdata/end_to_end_test/comparison/1985/01/01/old-entry9/index.html b/testdata/end_to_end_test/comparison/1985/01/01/old-entry9/index.html
index b2be926..5d6e6a9 100644
--- a/testdata/end_to_end_test/comparison/1985/01/01/old-entry9/index.html
+++ b/testdata/end_to_end_test/comparison/1985/01/01/old-entry9/index.html
@@ -59,6 +59,10 @@
old blog entry9
+
+ Reading time is one minute
+
+
diff --git a/testdata/end_to_end_test/comparison/2013/02/14/lazyblorg-example-entry/index.html b/testdata/end_to_end_test/comparison/2013/02/14/lazyblorg-example-entry/index.html
index 51198e6..1ae35e8 100644
--- a/testdata/end_to_end_test/comparison/2013/02/14/lazyblorg-example-entry/index.html
+++ b/testdata/end_to_end_test/comparison/2013/02/14/lazyblorg-example-entry/index.html
@@ -61,6 +61,10 @@
This is an example blog entry
+
+ Reading time is one minute
+
+
diff --git a/testdata/end_to_end_test/comparison/2013/08/22/testid/index.html b/testdata/end_to_end_test/comparison/2013/08/22/testid/index.html
index 4dee982..ece5085 100644
--- a/testdata/end_to_end_test/comparison/2013/08/22/testid/index.html
+++ b/testdata/end_to_end_test/comparison/2013/08/22/testid/index.html
@@ -59,6 +59,10 @@
Mini blog entry
+
+ Reading time is one minute
+
+
diff --git a/testdata/end_to_end_test/comparison/2014/01/30/full-syntax-test/index.html b/testdata/end_to_end_test/comparison/2014/01/30/full-syntax-test/index.html
index fa83923..c9e8be8 100644
--- a/testdata/end_to_end_test/comparison/2014/01/30/full-syntax-test/index.html
+++ b/testdata/end_to_end_test/comparison/2014/01/30/full-syntax-test/index.html
@@ -63,6 +63,10 @@
lazyblorg: Syntax-tests of the Currently Supported Org mode Syntax Elements from the Parser and HTMLizer
+
+ Reading time is 9 minutes
+
+
diff --git a/testdata/end_to_end_test/comparison/2016/09/18/from-nothing-to-done/index.html b/testdata/end_to_end_test/comparison/2016/09/18/from-nothing-to-done/index.html
index 19f569b..9dc8e5b 100644
--- a/testdata/end_to_end_test/comparison/2016/09/18/from-nothing-to-done/index.html
+++ b/testdata/end_to_end_test/comparison/2016/09/18/from-nothing-to-done/index.html
@@ -60,6 +60,10 @@
from nothing to DONE
+
+ Reading time is one minute
+
+
diff --git a/testdata/end_to_end_test/comparison/2016/10/31/an-hidden-blog-entry/index.html b/testdata/end_to_end_test/comparison/2016/10/31/an-hidden-blog-entry/index.html
index 6e5bdbc..6642abc 100644
--- a/testdata/end_to_end_test/comparison/2016/10/31/an-hidden-blog-entry/index.html
+++ b/testdata/end_to_end_test/comparison/2016/10/31/an-hidden-blog-entry/index.html
@@ -61,6 +61,10 @@
This Is An Hidden Article
+
+ Reading time is one minute
+
+
@@ -122,7 +126,7 @@ This Is An Hidden Article
Related articles that link to this one:
diff --git a/testdata/end_to_end_test/comparison/2016/10/31/my-temporal-article/index.html b/testdata/end_to_end_test/comparison/2016/10/31/my-temporal-article/index.html
index b80054a..4ea4154 100644
--- a/testdata/end_to_end_test/comparison/2016/10/31/my-temporal-article/index.html
+++ b/testdata/end_to_end_test/comparison/2016/10/31/my-temporal-article/index.html
@@ -60,6 +60,10 @@
A Temporal Article With Links
+
+ Reading time is one minute
+
+
@@ -133,7 +137,7 @@ A Temporal Article With Links
Related articles that link to this one:
diff --git a/testdata/end_to_end_test/comparison/2016/11/06/sanitization-examples/index.html b/testdata/end_to_end_test/comparison/2016/11/06/sanitization-examples/index.html
index e3d23b4..fd3d94f 100644
--- a/testdata/end_to_end_test/comparison/2016/11/06/sanitization-examples/index.html
+++ b/testdata/end_to_end_test/comparison/2016/11/06/sanitization-examples/index.html
@@ -60,6 +60,10 @@
Testing Misc Sanitization Issues
+
+ Reading time is one minute
+
+
diff --git a/testdata/end_to_end_test/comparison/2016/11/16/empty-language-autotag-page/index.html b/testdata/end_to_end_test/comparison/2016/11/16/empty-language-autotag-page/index.html
index fc7b0dc..ea181a5 100644
--- a/testdata/end_to_end_test/comparison/2016/11/16/empty-language-autotag-page/index.html
+++ b/testdata/end_to_end_test/comparison/2016/11/16/empty-language-autotag-page/index.html
@@ -60,6 +60,10 @@
Empty Autotag Page
+
+ Reading time is one minute
+
+
diff --git a/testdata/end_to_end_test/comparison/2016/11/27/image-test/index.html b/testdata/end_to_end_test/comparison/2016/11/27/image-test/index.html
index 3fee8ea..a6a28a4 100644
--- a/testdata/end_to_end_test/comparison/2016/11/27/image-test/index.html
+++ b/testdata/end_to_end_test/comparison/2016/11/27/image-test/index.html
@@ -60,6 +60,10 @@
Image Test and Language Tag Override
+
+ Lesezeit ist eine Minute
+
+
@@ -174,7 +178,7 @@ Image Test and Language Tag Override
-This is my caption
+This is my caption (klicken für größere Version)
@@ -185,7 +189,7 @@
Image Test and Language Tag Override
-This is my caption
+This is my caption (klicken für größere Version)
diff --git a/testdata/end_to_end_test/comparison/2016/11/27/special-characters/index.html b/testdata/end_to_end_test/comparison/2016/11/27/special-characters/index.html
index d25ee7a..cfcb620 100644
--- a/testdata/end_to_end_test/comparison/2016/11/27/special-characters/index.html
+++ b/testdata/end_to_end_test/comparison/2016/11/27/special-characters/index.html
@@ -60,6 +60,10 @@
Special Characters and Language Tag Override
+
+ Lesezeit ist eine Minute
+
+
@@ -114,7 +118,7 @@ Special Characters and Language Tag Over
Ähnliche Beiträge, die hierher zeigen:
diff --git a/testdata/end_to_end_test/comparison/2017/01/08/sanitizing-tests/index.html b/testdata/end_to_end_test/comparison/2017/01/08/sanitizing-tests/index.html
index fc49486..f95ba22 100644
--- a/testdata/end_to_end_test/comparison/2017/01/08/sanitizing-tests/index.html
+++ b/testdata/end_to_end_test/comparison/2017/01/08/sanitizing-tests/index.html
@@ -60,6 +60,10 @@
Sanitizing Elements
+
+ Reading time is one minute
+
+
diff --git a/testdata/end_to_end_test/comparison/2017/09/30/link-test/index.html b/testdata/end_to_end_test/comparison/2017/09/30/link-test/index.html
index 8d89ae5..cdd885c 100644
--- a/testdata/end_to_end_test/comparison/2017/09/30/link-test/index.html
+++ b/testdata/end_to_end_test/comparison/2017/09/30/link-test/index.html
@@ -5,19 +5,19 @@
-
+
-
+
-
+
@@ -36,7 +36,7 @@
- Links test
+ Links test and reading time over one minute
@@ -61,7 +61,11 @@
- Links test
+ Links test and reading time over one minute
+
+
+ Reading time is 3 minutes
+
@@ -138,6 +142,58 @@ Links test
What about a hidden entry ?
+
+
+ Some test to get the reading time to exceed one minute
+
+
+
+The reading time estimation feature is a very nice touch for the reader in order to get a rough figure how many minutes it takes to read an article.
+
+
+
+
+
+This way, she is able to determine if it is worth the time or not. Having such an indicator is almost standard in nowaday's web culture.
+
+
+
+
+
+My assumption is, that the reader is able to read a certain amount of words per minute. The initial value was chosen with 250. So maybe this is not adequate - let's see. Furthermore, I had to derive a heuristic algorithm to filter out non-word noise of the original Org mode source file. Those filtered things might contain real word (false positives) and the rest could contain non-words (false negatives). On average, this should not be a big issue. At least, this is a better approximation than counting all characters and assuming an average word consisting of five characters as I've seen in another algorithm. First, I would have had issues with filtering non-word content. Secondly, different languages do have different average sizes of words. So let's hope this is a valueable contribution to lazyblorg.
+
+
+
+
+
+In order to get the reading length high, here is some further dummy text as well:
+
+
+
+
+
+Aliquam erat volutpat. Nunc eleifend leo vitae magna. In id erat non orci commodo lobortis. Proin neque massa, cursus ut, gravida ut, lobortis eget, lacus. Sed diam. Praesent fermentum tempor tellus. Nullam tempus. Mauris ac felis vel velit tristique imperdiet. Donec at pede. Etiam vel neque nec dui dignissim bibendum. Vivamus id enim. Phasellus neque orci, porta a, aliquet quis, semper a, massa. Phasellus purus. Pellentesque tristique imperdiet tortor. Nam euismod tellus id erat.
+
+
+
+
+
+Aliquam erat volutpat. Nunc eleifend leo vitae magna. In id erat non orci commodo lobortis. Proin neque massa, cursus ut, gravida ut, lobortis eget, lacus. Sed diam. Praesent fermentum tempor tellus. Nullam tempus. Mauris ac felis vel velit tristique imperdiet. Donec at pede. Etiam vel neque nec dui dignissim bibendum. Vivamus id enim. Phasellus neque orci, porta a, aliquet quis, semper a, massa. Phasellus purus. Pellentesque tristique imperdiet tortor. Nam euismod tellus id erat.
+
+
+
+
+
+Aliquam erat volutpat. Nunc eleifend leo vitae magna. In id erat non orci commodo lobortis. Proin neque massa, cursus ut, gravida ut, lobortis eget, lacus. Sed diam. Praesent fermentum tempor tellus. Nullam tempus. Mauris ac felis vel velit tristique imperdiet. Donec at pede. Etiam vel neque nec dui dignissim bibendum. Vivamus id enim. Phasellus neque orci, porta a, aliquet quis, semper a, massa. Phasellus purus. Pellentesque tristique imperdiet tortor. Nam euismod tellus id erat.
+
+
+
+ An almost empty heading at the end
+
+
+
+This is to test the reading length estimation algorithm with a property drawer at the end.
+
@@ -145,7 +201,7 @@ Links test
Related articles that link to this one:
diff --git a/testdata/end_to_end_test/comparison/2017/09/30/link-test/source.org.txt b/testdata/end_to_end_test/comparison/2017/09/30/link-test/source.org.txt
index 51c9052..a00998a 100644
--- a/testdata/end_to_end_test/comparison/2017/09/30/link-test/source.org.txt
+++ b/testdata/end_to_end_test/comparison/2017/09/30/link-test/source.org.txt
@@ -22,6 +22,71 @@ Here [[id:2016-10-31-a-tag-page][in a quote]] block.
Here is [[id:2017-09-30-link-test][a self-reference]] which should not result in a backlink to itself.
*** Even [[id:2016-10-31-my-temporal-article][a heading should]] work
+:PROPERTIES:
+:END:
What about [[id:2016-10-31-an-hidden-blog-entry][a hidden entry]]?
+*** Some test to get the reading time to exceed one minute
+:PROPERTIES:
+:END:
+
+The [[https://github.com/novoid/lazyblorg/issues/47][reading time estimation feature]] is a very nice touch for the
+reader in order to get a rough figure how many minutes it takes to
+read an article.
+
+This way, she is able to determine if it is worth the time or not.
+Having such an indicator is almost standard in nowaday's web culture.
+
+My assumption is, that the reader is able to read a certain amount of
+words per minute. The initial value was chosen with 250. So maybe this
+is not adequate - let's see. Furthermore, I had to derive a heuristic
+algorithm to filter out non-word noise of the original Org mode source
+file. Those filtered things might contain real word (false positives)
+and the rest could contain non-words (false negatives). On average,
+this should not be a big issue. At least, this is a better
+approximation than counting all characters and assuming an average
+word consisting of five characters as I've seen in another algorithm.
+First, I would have had issues with filtering non-word content.
+Secondly, different languages do have different average sizes of
+words. So let's hope this is a valueable contribution to lazyblorg.
+
+------------
+
+In order to get the reading length high, here is some further dummy
+text as well:
+
+Aliquam erat volutpat. Nunc eleifend leo vitae magna. In id erat non
+orci commodo lobortis. Proin neque massa, cursus ut, gravida ut,
+lobortis eget, lacus. Sed diam. Praesent fermentum tempor tellus.
+Nullam tempus. Mauris ac felis vel velit tristique imperdiet. Donec at
+pede. Etiam vel neque nec dui dignissim bibendum. Vivamus id enim.
+Phasellus neque orci, porta a, aliquet quis, semper a, massa.
+Phasellus purus. Pellentesque tristique imperdiet tortor. Nam euismod
+tellus id erat.
+
+Aliquam erat volutpat. Nunc eleifend leo vitae magna. In id erat non
+orci commodo lobortis. Proin neque massa, cursus ut, gravida ut,
+lobortis eget, lacus. Sed diam. Praesent fermentum tempor tellus.
+Nullam tempus. Mauris ac felis vel velit tristique imperdiet. Donec at
+pede. Etiam vel neque nec dui dignissim bibendum. Vivamus id enim.
+Phasellus neque orci, porta a, aliquet quis, semper a, massa.
+Phasellus purus. Pellentesque tristique imperdiet tortor. Nam euismod
+tellus id erat.
+
+Aliquam erat volutpat. Nunc eleifend leo vitae magna. In id erat non
+orci commodo lobortis. Proin neque massa, cursus ut, gravida ut,
+lobortis eget, lacus. Sed diam. Praesent fermentum tempor tellus.
+Nullam tempus. Mauris ac felis vel velit tristique imperdiet. Donec at
+pede. Etiam vel neque nec dui dignissim bibendum. Vivamus id enim.
+Phasellus neque orci, porta a, aliquet quis, semper a, massa.
+Phasellus purus. Pellentesque tristique imperdiet tortor. Nam euismod
+tellus id erat.
+
+*** An almost empty heading at the end
+:PROPERTIES:
+:END:
+
+This is to test the reading length estimation algorithm with a
+property drawer at the end.
+
diff --git a/testdata/end_to_end_test/comparison/2020/10/02/Heading-which-starts-with-a-list/index.html b/testdata/end_to_end_test/comparison/2020/10/02/Heading-which-starts-with-a-list/index.html
index 275dfca..9b610a4 100644
--- a/testdata/end_to_end_test/comparison/2020/10/02/Heading-which-starts-with-a-list/index.html
+++ b/testdata/end_to_end_test/comparison/2020/10/02/Heading-which-starts-with-a-list/index.html
@@ -59,6 +59,10 @@
Heading which starts with a list
+
+ Reading time is one minute
+
+
diff --git a/testdata/end_to_end_test/comparison/about/index.html b/testdata/end_to_end_test/comparison/about/index.html
index 19eb5e3..f085f44 100644
--- a/testdata/end_to_end_test/comparison/about/index.html
+++ b/testdata/end_to_end_test/comparison/about/index.html
@@ -40,6 +40,10 @@
Placeholder: About
+
+ Reading time is one minute
+
+
diff --git a/testdata/end_to_end_test/comparison/feeds/lazyblorg-all.atom_1.0.links-and-content.xml b/testdata/end_to_end_test/comparison/feeds/lazyblorg-all.atom_1.0.links-and-content.xml
index 1c078fd..9ea386a 100644
--- a/testdata/end_to_end_test/comparison/feeds/lazyblorg-all.atom_1.0.links-and-content.xml
+++ b/testdata/end_to_end_test/comparison/feeds/lazyblorg-all.atom_1.0.links-and-content.xml
@@ -7,7 +7,7 @@
http://Karl-Voit.at/
/favicon.ico
- 2020-10-02T19:29:42+01:00
+ 2020-10-03T16:36:41+01:00
Karl Voit
@@ -44,7 +44,7 @@
- Links test
+ Links test and reading time over one minute
2017-09-30T17:05:00+01:00
2017-09-30T17:05:00+01:00
@@ -138,6 +138,69 @@ Here is a self-reference which
What about a hidden entry ?
+
+
+
+ Some test to get the reading time to exceed one minute
+
+
+
+
+The reading time estimation feature is a very nice touch for the reader in order to get a rough figure how many minutes it takes to read an article.
+
+
+
+
+
+
+This way, she is able to determine if it is worth the time or not. Having such an indicator is almost standard in nowaday's web culture.
+
+
+
+
+
+
+My assumption is, that the reader is able to read a certain amount of words per minute. The initial value was chosen with 250. So maybe this is not adequate - let's see. Furthermore, I had to derive a heuristic algorithm to filter out non-word noise of the original Org mode source file. Those filtered things might contain real word (false positives) and the rest could contain non-words (false negatives). On average, this should not be a big issue. At least, this is a better approximation than counting all characters and assuming an average word consisting of five characters as I've seen in another algorithm. First, I would have had issues with filtering non-word content. Secondly, different languages do have different average sizes of words. So let's hope this is a valueable contribution to lazyblorg.
+
+
+
+
+
+
+
+In order to get the reading length high, here is some further dummy text as well:
+
+
+
+
+
+
+Aliquam erat volutpat. Nunc eleifend leo vitae magna. In id erat non orci commodo lobortis. Proin neque massa, cursus ut, gravida ut, lobortis eget, lacus. Sed diam. Praesent fermentum tempor tellus. Nullam tempus. Mauris ac felis vel velit tristique imperdiet. Donec at pede. Etiam vel neque nec dui dignissim bibendum. Vivamus id enim. Phasellus neque orci, porta a, aliquet quis, semper a, massa. Phasellus purus. Pellentesque tristique imperdiet tortor. Nam euismod tellus id erat.
+
+
+
+
+
+
+Aliquam erat volutpat. Nunc eleifend leo vitae magna. In id erat non orci commodo lobortis. Proin neque massa, cursus ut, gravida ut, lobortis eget, lacus. Sed diam. Praesent fermentum tempor tellus. Nullam tempus. Mauris ac felis vel velit tristique imperdiet. Donec at pede. Etiam vel neque nec dui dignissim bibendum. Vivamus id enim. Phasellus neque orci, porta a, aliquet quis, semper a, massa. Phasellus purus. Pellentesque tristique imperdiet tortor. Nam euismod tellus id erat.
+
+
+
+
+
+
+Aliquam erat volutpat. Nunc eleifend leo vitae magna. In id erat non orci commodo lobortis. Proin neque massa, cursus ut, gravida ut, lobortis eget, lacus. Sed diam. Praesent fermentum tempor tellus. Nullam tempus. Mauris ac felis vel velit tristique imperdiet. Donec at pede. Etiam vel neque nec dui dignissim bibendum. Vivamus id enim. Phasellus neque orci, porta a, aliquet quis, semper a, massa. Phasellus purus. Pellentesque tristique imperdiet tortor. Nam euismod tellus id erat.
+
+
+
+
+ An almost empty heading at the end
+
+
+
+
+This is to test the reading length estimation algorithm with a property drawer at the end.
+
@@ -414,7 +477,7 @@ With caption, alt, right alined and 300px width, linked-image-width 560:
-This is my caption
+This is my caption (klicken für größere Version)
@@ -427,7 +490,7 @@ With caption, alt, right alined and 300px width, linked-image-width "original":
-This is my caption
+This is my caption (klicken für größere Version)
diff --git a/testdata/end_to_end_test/comparison/feeds/lazyblorg-all.atom_1.0.links-and-teaser.xml b/testdata/end_to_end_test/comparison/feeds/lazyblorg-all.atom_1.0.links-and-teaser.xml
index 543ec33..cde09ef 100644
--- a/testdata/end_to_end_test/comparison/feeds/lazyblorg-all.atom_1.0.links-and-teaser.xml
+++ b/testdata/end_to_end_test/comparison/feeds/lazyblorg-all.atom_1.0.links-and-teaser.xml
@@ -7,7 +7,7 @@
http://Karl-Voit.at/
/favicon.ico
- 2020-10-02T19:29:42+01:00
+ 2020-10-03T16:36:41+01:00
Karl Voit
@@ -34,7 +34,7 @@
- Links test
+ Links test and reading time over one minute
2017-09-30T17:05:00+01:00
2017-09-30T17:05:00+01:00
diff --git a/testdata/end_to_end_test/comparison/feeds/lazyblorg-all.atom_1.0.links-only.xml b/testdata/end_to_end_test/comparison/feeds/lazyblorg-all.atom_1.0.links-only.xml
index d8240d2..26902a6 100644
--- a/testdata/end_to_end_test/comparison/feeds/lazyblorg-all.atom_1.0.links-only.xml
+++ b/testdata/end_to_end_test/comparison/feeds/lazyblorg-all.atom_1.0.links-only.xml
@@ -7,7 +7,7 @@
http://Karl-Voit.at/
/favicon.ico
- 2020-10-02T19:29:42+01:00
+ 2020-10-03T16:36:41+01:00
Karl Voit
@@ -25,7 +25,7 @@
- Links test
+ Links test and reading time over one minute
2017-09-30T17:05:00+01:00
2017-09-30T17:05:00+01:00
diff --git a/testdata/end_to_end_test/comparison/how-to-use-public-voit/index.html b/testdata/end_to_end_test/comparison/how-to-use-public-voit/index.html
index f385b2e..63f4c78 100644
--- a/testdata/end_to_end_test/comparison/how-to-use-public-voit/index.html
+++ b/testdata/end_to_end_test/comparison/how-to-use-public-voit/index.html
@@ -39,6 +39,10 @@
Placeholder: How to Use This Blog Efficiently
+
+ Reading time is one minute
+
+
diff --git a/testdata/end_to_end_test/comparison/index.html b/testdata/end_to_end_test/comparison/index.html
index 2b0ade6..c8c852e 100644
--- a/testdata/end_to_end_test/comparison/index.html
+++ b/testdata/end_to_end_test/comparison/index.html
@@ -139,7 +139,7 @@
-
+
diff --git a/testdata/end_to_end_test/comparison/persistent-entry/index.html b/testdata/end_to_end_test/comparison/persistent-entry/index.html
index e40b4c6..16f08fc 100644
--- a/testdata/end_to_end_test/comparison/persistent-entry/index.html
+++ b/testdata/end_to_end_test/comparison/persistent-entry/index.html
@@ -42,6 +42,10 @@
A Wonderful Persistent Blog Entry
+
+ Reading time is one minute
+
+
@@ -96,7 +100,7 @@ A Wonderful Persistent Blog Entry
Related articles that link to this one: