Skip to content

Commit

Permalink
Reading time estimations, fixing #47
Browse files Browse the repository at this point in the history
  • Loading branch information
novoid committed Oct 3, 2020
1 parent 81b1233 commit 9244a7f
Show file tree
Hide file tree
Showing 46 changed files with 552 additions and 33 deletions.
2 changes: 2 additions & 0 deletions README.org
Original file line number Diff line number Diff line change
Expand Up @@ -296,6 +296,8 @@ efficiently:
the entry page, navigational pages, and the feeds. This way, you
can publish pages who can only be access by people knowing its URL.

- Reading time estimations (multi-language) following [[https://github.com/novoid/lazyblorg/issues/47][this feature request]]

** FAQs

See https://github.com/novoid/lazyblorg/wiki/FAQs
Expand Down
81 changes: 80 additions & 1 deletion lib/htmlizer.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
# -*- coding: utf-8; mode: python; -*-
# Time-stamp: <2020-10-03 16:27:56 vk>
# Time-stamp: <2020-10-03 16:45:42 vk>

import config # lazyblorg-global settings
import sys
import logging
import os
from datetime import datetime
from time import time, localtime, strftime
from math import ceil # for calculating reading time
import re # RegEx: for parsing/sanitizing
import codecs
from lib.utils import Utils # for guess_language_from_stopword_percentages()
Expand Down Expand Up @@ -313,6 +314,28 @@ def _populate_dict_of_tags_with_ids(self, blog_data):

return dict_of_tags_with_ids

def _derive_reading_length(self, rawcontent: str) -> int:
"""
Determines the number of minutes reading time from the rawcontent of the article.
Assumption: people are able to read 250 words per minute.
See https://github.com/novoid/lazyblorg/issues/47 for the idea and implementation notes.
"""

# remove heading title and drawer in order to get body of content:
rawcontent_without_header: str = re.sub(r':PROPERTIES:.+?:END:\n', '', rawcontent, flags=re.DOTALL)

# remove all "words" (according to split()) which contains numbers or other characters that are indicators of non-word elements:
raw_words: list = [x for x in rawcontent_without_header.split() if not re.match(r'.*[|0123456789].*', x)]
raw_words = [x for x in raw_words if not x.startswith(('#+', '-', ':'))]

minutes: int = ceil(len(raw_words) / 250)

if minutes == 0:
minutes = 1 # even empty articles should take one minute to watch at
return minutes

def _generate_pages_for_tags_persistent_temporal(self, tags):
"""
Method that creates the pages for tag-pages, persistent pages, and temporal pages.
Expand Down Expand Up @@ -348,6 +371,10 @@ def _generate_pages_for_tags_persistent_temporal(self, tags):

entry = self.sanitize_and_htmlize_blog_content(entry)

# populate reading time indicator:
if 'rawcontent' in entry.keys():
entry['reading_minutes'] = self._derive_reading_length(entry['rawcontent'])

htmlcontent = None

if entry['category'] == config.TAGS:
Expand Down Expand Up @@ -475,6 +502,7 @@ def _generate_tag_page(self, entry):
htmlcontent += self._replace_general_article_placeholders(
entry, content)
htmlcontent = self.sanitize_internal_links(htmlcontent)
htmlcontent = self._insert_reading_minutes_if_found(entry, htmlcontent)

return htmlfilename, orgfilename, htmlcontent

Expand Down Expand Up @@ -1967,6 +1995,53 @@ def _generate_temporal_article(self, entry):
htmlcontent += self._replace_general_article_placeholders(
entry, content)
htmlcontent = self.sanitize_internal_links(htmlcontent)
htmlcontent = self._insert_reading_minutes_if_found(entry, htmlcontent)

return htmlfilename, orgfilename, htmlcontent

def _insert_reading_minutes_if_found(self, entry, htmlcontent):
"""
Handles the snippet that contains the estimation for the reading minutes.
Deletes the snippet of the template if none found.
"""
content = ''
if 'reading_minutes' in entry.keys():
if '#READING-MINUTES-SECTION#' in htmlcontent:
# insert snippet
snippetname = 'reading-time-'

# handle one or many minutes: (I do have different snippets for those cases)
if entry['reading_minutes'] == 1:
snippetname += 'one-minute-'
else:
snippetname += 'multiple-minutes-'

# handle different languages:
if entry['autotags']['language'] == 'deutsch':
# FIXXME: other languages than german have to be added
# here: (generalize using a configured list of known
# languages?)
snippetname += 'de'
else:
snippetname += 'en'

# insert snippet:
content = htmlcontent.replace('#READING-MINUTES-SECTION#', self.template_definition_by_name(snippetname))
# replace actual minutes (if found):
content = self._replace_general_article_placeholders(entry, content)
return content
else:
# remove template snippet because we've got no minutes to insert
# NOTE: Should be dead code
logging.warning('Entry %s: missing reading minutes, removing snippet' % entry['id'])
return htmlcontent.replace('#READING-MINUTES-SECTION#', '')
else:
# missing reading minutes should only be OK with
# auto-generated tag pages. Report error if otherwise:
if not entry['id'].startswith(self.ID_PREFIX_FOR_EMPTY_TAG_PAGES):
logging.warning('Entry %s: missing reading minutes in "entry[]"' % entry['id'])
return htmlcontent


return htmlfilename, orgfilename, htmlcontent

Expand Down Expand Up @@ -2018,6 +2093,7 @@ def _generate_persistent_article(self, entry):
htmlcontent += self._replace_general_article_placeholders(
entry, content)
htmlcontent = self.sanitize_internal_links(htmlcontent)
htmlcontent = self._insert_reading_minutes_if_found(entry, htmlcontent)

return htmlfilename, orgfilename, htmlcontent

Expand Down Expand Up @@ -2158,6 +2234,9 @@ def _replace_general_article_placeholders(self, entry, template):
self._generate_tag_page_list(
entry['title']))

if 'reading_minutes' in entry.keys():
content = content.replace('#READINGMINUTES#', str(entry['reading_minutes']))

return content

def _generate_top_tag_list(self):
Expand Down
36 changes: 36 additions & 0 deletions templates/blog-format.org
Original file line number Diff line number Diff line change
Expand Up @@ -283,6 +283,36 @@ With name/label:

#+END_EXPORT

*** Reading time indicators

#+NAME: reading-time-one-minute-en
#+BEGIN_HTML
<aside class="reading-time-section">
Reading time is one minute
</aside>
#+END_HTML

#+NAME: reading-time-one-minute-de
#+BEGIN_HTML
<aside class="reading-time-section">
Lesezeit ist eine Minute
</aside>
#+END_HTML

#+NAME: reading-time-multiple-minutes-en
#+BEGIN_HTML
<aside class="reading-time-section">
Reading time is #READINGMINUTES# minutes
</aside>
#+END_HTML

#+NAME: reading-time-multiple-minutes-de
#+BEGIN_HTML
<aside class="reading-time-section">
Lesezeit ist #READINGMINUTES# Minuten
</aside>
#+END_HTML

*** ignore me

#+NAME:
Expand Down Expand Up @@ -542,6 +572,8 @@ With name/label:

<h1 class="common-article-header-title">#ARTICLE-TITLE#</h1>

#READING-MINUTES-SECTION#

</header>

#COMMON-SIDEBAR#
Expand Down Expand Up @@ -730,6 +762,8 @@ With name/label:

<h1 class="common-article-header-title">#ARTICLE-TITLE#</h1>

#READING-MINUTES-SECTION#

</header>

#COMMON-SIDEBAR#
Expand Down Expand Up @@ -1029,6 +1063,8 @@ Pages that describe a tag. Corresponding Org-mode entries must have:

<h1 class="common-article-header-title">Tag Page for the Tag "#ARTICLE-TITLE#"</h1>

#READING-MINUTES-SECTION#

</header>

#COMMON-SIDEBAR#
Expand Down
19 changes: 14 additions & 5 deletions templates/public_voit.css

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

11 changes: 11 additions & 0 deletions templates/public_voit.scss
Original file line number Diff line number Diff line change
Expand Up @@ -512,6 +512,17 @@ figcaption {
font-size: 80%;
}

.reading-time-section {
@extend .middle_section_wide;
font-size: 80%;
color: gray;
text-align: right;
margin-left: auto;
margin-right: auto;
/* padding-top: 0.2em;*/
padding-right: 15em;
}

.published-on {
@extend .middle_section_wide;
font-size: 80%;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,10 @@

<h1 class="common-article-header-title">old blog entry1</h1>

<aside class="reading-time-section">
Reading time is one minute
</aside>

</header>


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,10 @@

<h1 class="common-article-header-title">old blog entry10</h1>

<aside class="reading-time-section">
Reading time is one minute
</aside>

</header>


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,10 @@

<h1 class="common-article-header-title">old blog entry11</h1>

<aside class="reading-time-section">
Reading time is one minute
</aside>

</header>


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,10 @@

<h1 class="common-article-header-title">old blog entry12</h1>

<aside class="reading-time-section">
Reading time is one minute
</aside>

</header>


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,10 @@

<h1 class="common-article-header-title">old blog entry13</h1>

<aside class="reading-time-section">
Reading time is one minute
</aside>

</header>


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,10 @@

<h1 class="common-article-header-title">old blog entry14</h1>

<aside class="reading-time-section">
Reading time is one minute
</aside>

</header>


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,10 @@

<h1 class="common-article-header-title">old blog entry2</h1>

<aside class="reading-time-section">
Reading time is one minute
</aside>

</header>


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,10 @@

<h1 class="common-article-header-title">old blog entry3</h1>

<aside class="reading-time-section">
Reading time is one minute
</aside>

</header>


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,10 @@

<h1 class="common-article-header-title">old blog entry4</h1>

<aside class="reading-time-section">
Reading time is one minute
</aside>

</header>


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,10 @@

<h1 class="common-article-header-title">old blog entry5</h1>

<aside class="reading-time-section">
Reading time is one minute
</aside>

</header>


Expand Down

0 comments on commit 9244a7f

Please sign in to comment.