From 9244a7f5f0b92f9b7974572b10653b207edc6c58 Mon Sep 17 00:00:00 2001 From: Karl Voit Date: Sat, 3 Oct 2020 16:53:10 +0200 Subject: [PATCH] Reading time estimations, fixing #47 --- README.org | 2 + lib/htmlizer.py | 81 ++++++++++++++++++- templates/blog-format.org | 36 +++++++++ templates/public_voit.css | 19 +++-- templates/public_voit.scss | 11 +++ .../1985/01/01/old-entry1/index.html | 4 + .../1985/01/01/old-entry10/index.html | 4 + .../1985/01/01/old-entry11/index.html | 4 + .../1985/01/01/old-entry12/index.html | 4 + .../1985/01/01/old-entry13/index.html | 4 + .../1985/01/01/old-entry14/index.html | 4 + .../1985/01/01/old-entry2/index.html | 4 + .../1985/01/01/old-entry3/index.html | 4 + .../1985/01/01/old-entry4/index.html | 4 + .../1985/01/01/old-entry5/index.html | 4 + .../1985/01/01/old-entry6/index.html | 4 + .../1985/01/01/old-entry7/index.html | 4 + .../1985/01/01/old-entry8/index.html | 4 + .../1985/01/01/old-entry9/index.html | 4 + .../02/14/lazyblorg-example-entry/index.html | 4 + .../comparison/2013/08/22/testid/index.html | 4 + .../2014/01/30/full-syntax-test/index.html | 4 + .../09/18/from-nothing-to-done/index.html | 4 + .../10/31/an-hidden-blog-entry/index.html | 6 +- .../2016/10/31/my-temporal-article/index.html | 6 +- .../11/06/sanitization-examples/index.html | 4 + .../16/empty-language-autotag-page/index.html | 4 + .../2016/11/27/image-test/index.html | 8 +- .../2016/11/27/special-characters/index.html | 6 +- .../2017/01/08/sanitizing-tests/index.html | 4 + .../2017/09/30/link-test/index.html | 68 ++++++++++++++-- .../2017/09/30/link-test/source.org.txt | 65 +++++++++++++++ .../index.html | 4 + .../comparison/about/index.html | 4 + ...zyblorg-all.atom_1.0.links-and-content.xml | 71 +++++++++++++++- ...azyblorg-all.atom_1.0.links-and-teaser.xml | 4 +- .../lazyblorg-all.atom_1.0.links-only.xml | 4 +- .../how-to-use-public-voit/index.html | 4 + .../end_to_end_test/comparison/index.html | 2 +- .../comparison/persistent-entry/index.html | 6 +- .../comparison/tags/exampletag/index.html | 8 +- .../comparison/tags/lazyblorg/index.html | 4 +- .../comparison/tags/mytest/index.html | 4 +- .../comparison/tags/programming/index.html | 6 +- .../comparison/tags/testtag1/index.html | 2 + .../orgfiles/real-world-entries.org | 70 +++++++++++++++- 46 files changed, 552 insertions(+), 33 deletions(-) diff --git a/README.org b/README.org index 8890b61..5b3c943 100644 --- a/README.org +++ b/README.org @@ -296,6 +296,8 @@ efficiently: the entry page, navigational pages, and the feeds. This way, you can publish pages who can only be access by people knowing its URL. +- Reading time estimations (multi-language) following [[https://github.com/novoid/lazyblorg/issues/47][this feature request]] + ** FAQs See https://github.com/novoid/lazyblorg/wiki/FAQs diff --git a/lib/htmlizer.py b/lib/htmlizer.py index ccf1229..2788c85 100644 --- a/lib/htmlizer.py +++ b/lib/htmlizer.py @@ -1,5 +1,5 @@ # -*- coding: utf-8; mode: python; -*- -# Time-stamp: <2020-10-03 16:27:56 vk> +# Time-stamp: <2020-10-03 16:45:42 vk> import config # lazyblorg-global settings import sys @@ -7,6 +7,7 @@ import os from datetime import datetime from time import time, localtime, strftime +from math import ceil # for calculating reading time import re # RegEx: for parsing/sanitizing import codecs from lib.utils import Utils # for guess_language_from_stopword_percentages() @@ -313,6 +314,28 @@ def _populate_dict_of_tags_with_ids(self, blog_data): return dict_of_tags_with_ids + def _derive_reading_length(self, rawcontent: str) -> int: + """ + Determines the number of minutes reading time from the rawcontent of the article. + + Assumption: people are able to read 250 words per minute. + + See https://github.com/novoid/lazyblorg/issues/47 for the idea and implementation notes. + """ + + # remove heading title and drawer in order to get body of content: + rawcontent_without_header: str = re.sub(r':PROPERTIES:.+?:END:\n', '', rawcontent, flags=re.DOTALL) + + # remove all "words" (according to split()) which contains numbers or other characters that are indicators of non-word elements: + raw_words: list = [x for x in rawcontent_without_header.split() if not re.match(r'.*[|0123456789].*', x)] + raw_words = [x for x in raw_words if not x.startswith(('#+', '-', ':'))] + + minutes: int = ceil(len(raw_words) / 250) + + if minutes == 0: + minutes = 1 # even empty articles should take one minute to watch at + return minutes + def _generate_pages_for_tags_persistent_temporal(self, tags): """ Method that creates the pages for tag-pages, persistent pages, and temporal pages. @@ -348,6 +371,10 @@ def _generate_pages_for_tags_persistent_temporal(self, tags): entry = self.sanitize_and_htmlize_blog_content(entry) + # populate reading time indicator: + if 'rawcontent' in entry.keys(): + entry['reading_minutes'] = self._derive_reading_length(entry['rawcontent']) + htmlcontent = None if entry['category'] == config.TAGS: @@ -475,6 +502,7 @@ def _generate_tag_page(self, entry): htmlcontent += self._replace_general_article_placeholders( entry, content) htmlcontent = self.sanitize_internal_links(htmlcontent) + htmlcontent = self._insert_reading_minutes_if_found(entry, htmlcontent) return htmlfilename, orgfilename, htmlcontent @@ -1967,6 +1995,53 @@ def _generate_temporal_article(self, entry): htmlcontent += self._replace_general_article_placeholders( entry, content) htmlcontent = self.sanitize_internal_links(htmlcontent) + htmlcontent = self._insert_reading_minutes_if_found(entry, htmlcontent) + + return htmlfilename, orgfilename, htmlcontent + + def _insert_reading_minutes_if_found(self, entry, htmlcontent): + """ + Handles the snippet that contains the estimation for the reading minutes. + Deletes the snippet of the template if none found. + """ + content = '' + if 'reading_minutes' in entry.keys(): + if '#READING-MINUTES-SECTION#' in htmlcontent: + # insert snippet + snippetname = 'reading-time-' + + # handle one or many minutes: (I do have different snippets for those cases) + if entry['reading_minutes'] == 1: + snippetname += 'one-minute-' + else: + snippetname += 'multiple-minutes-' + + # handle different languages: + if entry['autotags']['language'] == 'deutsch': + # FIXXME: other languages than german have to be added + # here: (generalize using a configured list of known + # languages?) + snippetname += 'de' + else: + snippetname += 'en' + + # insert snippet: + content = htmlcontent.replace('#READING-MINUTES-SECTION#', self.template_definition_by_name(snippetname)) + # replace actual minutes (if found): + content = self._replace_general_article_placeholders(entry, content) + return content + else: + # remove template snippet because we've got no minutes to insert + # NOTE: Should be dead code + logging.warning('Entry %s: missing reading minutes, removing snippet' % entry['id']) + return htmlcontent.replace('#READING-MINUTES-SECTION#', '') + else: + # missing reading minutes should only be OK with + # auto-generated tag pages. Report error if otherwise: + if not entry['id'].startswith(self.ID_PREFIX_FOR_EMPTY_TAG_PAGES): + logging.warning('Entry %s: missing reading minutes in "entry[]"' % entry['id']) + return htmlcontent + return htmlfilename, orgfilename, htmlcontent @@ -2018,6 +2093,7 @@ def _generate_persistent_article(self, entry): htmlcontent += self._replace_general_article_placeholders( entry, content) htmlcontent = self.sanitize_internal_links(htmlcontent) + htmlcontent = self._insert_reading_minutes_if_found(entry, htmlcontent) return htmlfilename, orgfilename, htmlcontent @@ -2158,6 +2234,9 @@ def _replace_general_article_placeholders(self, entry, template): self._generate_tag_page_list( entry['title'])) + if 'reading_minutes' in entry.keys(): + content = content.replace('#READINGMINUTES#', str(entry['reading_minutes'])) + return content def _generate_top_tag_list(self): diff --git a/templates/blog-format.org b/templates/blog-format.org index 3d77b5b..c03a41b 100644 --- a/templates/blog-format.org +++ b/templates/blog-format.org @@ -283,6 +283,36 @@ With name/label: #+END_EXPORT +*** Reading time indicators + +#+NAME: reading-time-one-minute-en +#+BEGIN_HTML + +#+END_HTML + +#+NAME: reading-time-one-minute-de +#+BEGIN_HTML + +#+END_HTML + +#+NAME: reading-time-multiple-minutes-en +#+BEGIN_HTML + +#+END_HTML + +#+NAME: reading-time-multiple-minutes-de +#+BEGIN_HTML + +#+END_HTML + *** ignore me #+NAME: @@ -542,6 +572,8 @@ With name/label:

#ARTICLE-TITLE#

+ #READING-MINUTES-SECTION# + #COMMON-SIDEBAR# @@ -730,6 +762,8 @@ With name/label:

#ARTICLE-TITLE#

+ #READING-MINUTES-SECTION# + #COMMON-SIDEBAR# @@ -1029,6 +1063,8 @@ Pages that describe a tag. Corresponding Org-mode entries must have:

Tag Page for the Tag "#ARTICLE-TITLE#"

+ #READING-MINUTES-SECTION# + #COMMON-SIDEBAR# diff --git a/templates/public_voit.css b/templates/public_voit.css index 6e7d30b..405e91a 100644 --- a/templates/public_voit.css +++ b/templates/public_voit.css @@ -39,7 +39,7 @@ a { padding-left: 1em; padding-right: 1em; } - .middle_section_wide, header, .published-on, footer, .temporal-article-header-nav, .month-overview-header-nav, .entrypage-article-header-nav, .persistent-article-header-nav { + .middle_section_wide, header, .reading-time-section, .published-on, footer, .temporal-article-header-nav, .month-overview-header-nav, .entrypage-article-header-nav, .persistent-article-header-nav { margin-left: auto; margin-right: auto; max-width: 45em; } @@ -103,7 +103,7 @@ a { padding-left: 1em; padding-right: 1em; } - .middle_section_wide, header, .published-on, footer, .temporal-article-header-nav, .month-overview-header-nav, .entrypage-article-header-nav, .persistent-article-header-nav { + .middle_section_wide, header, .reading-time-section, .published-on, footer, .temporal-article-header-nav, .month-overview-header-nav, .entrypage-article-header-nav, .persistent-article-header-nav { margin-left: auto; margin-right: auto; max-width: 45em; } @@ -139,7 +139,7 @@ a { padding-left: 1em; padding-right: 1em; } - .middle_section_wide, header, .published-on, footer, .temporal-article-header-nav, .month-overview-header-nav, .entrypage-article-header-nav, .persistent-article-header-nav { + .middle_section_wide, header, .reading-time-section, .published-on, footer, .temporal-article-header-nav, .month-overview-header-nav, .entrypage-article-header-nav, .persistent-article-header-nav { margin-left: auto; margin-right: auto; max-width: 45em; } @@ -173,7 +173,7 @@ a { margin-right: auto; max-width: 40em; } - .middle_section_wide, header, .published-on, footer, .temporal-article-header-nav, .month-overview-header-nav, .entrypage-article-header-nav, .persistent-article-header-nav { + .middle_section_wide, header, .reading-time-section, .published-on, footer, .temporal-article-header-nav, .month-overview-header-nav, .entrypage-article-header-nav, .persistent-article-header-nav { margin-left: auto; margin-right: auto; max-width: 60em; } @@ -207,7 +207,7 @@ a { margin-right: auto; max-width: 40em; } - .middle_section_wide, header, .published-on, footer, .temporal-article-header-nav, .month-overview-header-nav, .entrypage-article-header-nav, .persistent-article-header-nav { + .middle_section_wide, header, .reading-time-section, .published-on, footer, .temporal-article-header-nav, .month-overview-header-nav, .entrypage-article-header-nav, .persistent-article-header-nav { margin-left: auto; margin-right: auto; max-width: 60em; } @@ -452,6 +452,15 @@ figcaption { color: #252525; font-size: 80%; } +.reading-time-section { + font-size: 80%; + color: gray; + text-align: right; + margin-left: auto; + margin-right: auto; + /* padding-top: 0.2em;*/ + padding-right: 15em; } + .published-on { font-size: 80%; color: gray; diff --git a/templates/public_voit.scss b/templates/public_voit.scss index 25cf1c9..bdeac46 100644 --- a/templates/public_voit.scss +++ b/templates/public_voit.scss @@ -512,6 +512,17 @@ figcaption { font-size: 80%; } +.reading-time-section { + @extend .middle_section_wide; + font-size: 80%; + color: gray; + text-align: right; + margin-left: auto; + margin-right: auto; + /* padding-top: 0.2em;*/ + padding-right: 15em; +} + .published-on { @extend .middle_section_wide; font-size: 80%; diff --git a/testdata/end_to_end_test/comparison/1985/01/01/old-entry1/index.html b/testdata/end_to_end_test/comparison/1985/01/01/old-entry1/index.html index ba25d7f..c47e8ac 100644 --- a/testdata/end_to_end_test/comparison/1985/01/01/old-entry1/index.html +++ b/testdata/end_to_end_test/comparison/1985/01/01/old-entry1/index.html @@ -59,6 +59,10 @@

old blog entry1

+ + diff --git a/testdata/end_to_end_test/comparison/1985/01/01/old-entry10/index.html b/testdata/end_to_end_test/comparison/1985/01/01/old-entry10/index.html index 61759fd..5ea8141 100644 --- a/testdata/end_to_end_test/comparison/1985/01/01/old-entry10/index.html +++ b/testdata/end_to_end_test/comparison/1985/01/01/old-entry10/index.html @@ -59,6 +59,10 @@

old blog entry10

+ + diff --git a/testdata/end_to_end_test/comparison/1985/01/01/old-entry11/index.html b/testdata/end_to_end_test/comparison/1985/01/01/old-entry11/index.html index 6d9d6fe..8dd69dc 100644 --- a/testdata/end_to_end_test/comparison/1985/01/01/old-entry11/index.html +++ b/testdata/end_to_end_test/comparison/1985/01/01/old-entry11/index.html @@ -59,6 +59,10 @@

old blog entry11

+ + diff --git a/testdata/end_to_end_test/comparison/1985/01/01/old-entry12/index.html b/testdata/end_to_end_test/comparison/1985/01/01/old-entry12/index.html index 90d960d..d4c522a 100644 --- a/testdata/end_to_end_test/comparison/1985/01/01/old-entry12/index.html +++ b/testdata/end_to_end_test/comparison/1985/01/01/old-entry12/index.html @@ -59,6 +59,10 @@

old blog entry12

+ + diff --git a/testdata/end_to_end_test/comparison/1985/01/01/old-entry13/index.html b/testdata/end_to_end_test/comparison/1985/01/01/old-entry13/index.html index fbd7495..e914bcc 100644 --- a/testdata/end_to_end_test/comparison/1985/01/01/old-entry13/index.html +++ b/testdata/end_to_end_test/comparison/1985/01/01/old-entry13/index.html @@ -59,6 +59,10 @@

old blog entry13

+ + diff --git a/testdata/end_to_end_test/comparison/1985/01/01/old-entry14/index.html b/testdata/end_to_end_test/comparison/1985/01/01/old-entry14/index.html index ed54f13..dee5eff 100644 --- a/testdata/end_to_end_test/comparison/1985/01/01/old-entry14/index.html +++ b/testdata/end_to_end_test/comparison/1985/01/01/old-entry14/index.html @@ -59,6 +59,10 @@

old blog entry14

+ + diff --git a/testdata/end_to_end_test/comparison/1985/01/01/old-entry2/index.html b/testdata/end_to_end_test/comparison/1985/01/01/old-entry2/index.html index cf9f6d3..f2e38c8 100644 --- a/testdata/end_to_end_test/comparison/1985/01/01/old-entry2/index.html +++ b/testdata/end_to_end_test/comparison/1985/01/01/old-entry2/index.html @@ -59,6 +59,10 @@

old blog entry2

+ + diff --git a/testdata/end_to_end_test/comparison/1985/01/01/old-entry3/index.html b/testdata/end_to_end_test/comparison/1985/01/01/old-entry3/index.html index f44fbf1..35c98b1 100644 --- a/testdata/end_to_end_test/comparison/1985/01/01/old-entry3/index.html +++ b/testdata/end_to_end_test/comparison/1985/01/01/old-entry3/index.html @@ -59,6 +59,10 @@

old blog entry3

+ + diff --git a/testdata/end_to_end_test/comparison/1985/01/01/old-entry4/index.html b/testdata/end_to_end_test/comparison/1985/01/01/old-entry4/index.html index 22a1884..f8a3fac 100644 --- a/testdata/end_to_end_test/comparison/1985/01/01/old-entry4/index.html +++ b/testdata/end_to_end_test/comparison/1985/01/01/old-entry4/index.html @@ -59,6 +59,10 @@

old blog entry4

+ + diff --git a/testdata/end_to_end_test/comparison/1985/01/01/old-entry5/index.html b/testdata/end_to_end_test/comparison/1985/01/01/old-entry5/index.html index 06435cc..3451b54 100644 --- a/testdata/end_to_end_test/comparison/1985/01/01/old-entry5/index.html +++ b/testdata/end_to_end_test/comparison/1985/01/01/old-entry5/index.html @@ -59,6 +59,10 @@

old blog entry5

+ + diff --git a/testdata/end_to_end_test/comparison/1985/01/01/old-entry6/index.html b/testdata/end_to_end_test/comparison/1985/01/01/old-entry6/index.html index 63ca416..8a680c1 100644 --- a/testdata/end_to_end_test/comparison/1985/01/01/old-entry6/index.html +++ b/testdata/end_to_end_test/comparison/1985/01/01/old-entry6/index.html @@ -59,6 +59,10 @@

old blog entry6

+ + diff --git a/testdata/end_to_end_test/comparison/1985/01/01/old-entry7/index.html b/testdata/end_to_end_test/comparison/1985/01/01/old-entry7/index.html index 839ffee..a271eb0 100644 --- a/testdata/end_to_end_test/comparison/1985/01/01/old-entry7/index.html +++ b/testdata/end_to_end_test/comparison/1985/01/01/old-entry7/index.html @@ -59,6 +59,10 @@

old blog entry7

+ + diff --git a/testdata/end_to_end_test/comparison/1985/01/01/old-entry8/index.html b/testdata/end_to_end_test/comparison/1985/01/01/old-entry8/index.html index 9b7192f..f1fbe8c 100644 --- a/testdata/end_to_end_test/comparison/1985/01/01/old-entry8/index.html +++ b/testdata/end_to_end_test/comparison/1985/01/01/old-entry8/index.html @@ -59,6 +59,10 @@

old blog entry8

+ + diff --git a/testdata/end_to_end_test/comparison/1985/01/01/old-entry9/index.html b/testdata/end_to_end_test/comparison/1985/01/01/old-entry9/index.html index b2be926..5d6e6a9 100644 --- a/testdata/end_to_end_test/comparison/1985/01/01/old-entry9/index.html +++ b/testdata/end_to_end_test/comparison/1985/01/01/old-entry9/index.html @@ -59,6 +59,10 @@

old blog entry9

+ + diff --git a/testdata/end_to_end_test/comparison/2013/02/14/lazyblorg-example-entry/index.html b/testdata/end_to_end_test/comparison/2013/02/14/lazyblorg-example-entry/index.html index 51198e6..1ae35e8 100644 --- a/testdata/end_to_end_test/comparison/2013/02/14/lazyblorg-example-entry/index.html +++ b/testdata/end_to_end_test/comparison/2013/02/14/lazyblorg-example-entry/index.html @@ -61,6 +61,10 @@

This is an example blog entry

+ + diff --git a/testdata/end_to_end_test/comparison/2013/08/22/testid/index.html b/testdata/end_to_end_test/comparison/2013/08/22/testid/index.html index 4dee982..ece5085 100644 --- a/testdata/end_to_end_test/comparison/2013/08/22/testid/index.html +++ b/testdata/end_to_end_test/comparison/2013/08/22/testid/index.html @@ -59,6 +59,10 @@

Mini blog entry

+ + diff --git a/testdata/end_to_end_test/comparison/2014/01/30/full-syntax-test/index.html b/testdata/end_to_end_test/comparison/2014/01/30/full-syntax-test/index.html index fa83923..c9e8be8 100644 --- a/testdata/end_to_end_test/comparison/2014/01/30/full-syntax-test/index.html +++ b/testdata/end_to_end_test/comparison/2014/01/30/full-syntax-test/index.html @@ -63,6 +63,10 @@

lazyblorg: Syntax-tests of the Currently Supported Org mode Syntax Elements from the Parser and HTMLizer

+ + diff --git a/testdata/end_to_end_test/comparison/2016/09/18/from-nothing-to-done/index.html b/testdata/end_to_end_test/comparison/2016/09/18/from-nothing-to-done/index.html index 19f569b..9dc8e5b 100644 --- a/testdata/end_to_end_test/comparison/2016/09/18/from-nothing-to-done/index.html +++ b/testdata/end_to_end_test/comparison/2016/09/18/from-nothing-to-done/index.html @@ -60,6 +60,10 @@

from nothing to DONE

+ + diff --git a/testdata/end_to_end_test/comparison/2016/10/31/an-hidden-blog-entry/index.html b/testdata/end_to_end_test/comparison/2016/10/31/an-hidden-blog-entry/index.html index 6e5bdbc..6642abc 100644 --- a/testdata/end_to_end_test/comparison/2016/10/31/an-hidden-blog-entry/index.html +++ b/testdata/end_to_end_test/comparison/2016/10/31/an-hidden-blog-entry/index.html @@ -61,6 +61,10 @@

This Is An Hidden Article

+ + @@ -122,7 +126,7 @@

This Is An Hidden Article


Related articles that link to this one: