lektor · dairiki · Feb 11, 2022 · Mar 19, 2020 · Mar 22, 2020 · Mar 22, 2020
diff --git a/AUTHORS b/AUTHORS
@@ -0,0 +1,7 @@
+Original author: A. Jesse Jiryu Davis
+
+Contributors:
+
+- Joseph Nix (release, bug fixes)
+- Jakob Schnitzer (release, bug fixes)
+- Louis Paternault (tag weights)
diff --git a/LICENSE b/LICENSE
@@ -0,0 +1,8 @@
+Copyright 2016 A. Jesse Jiryu Davis
+Copyright 2018 Joseph Nix
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
diff --git a/README.md b/README.md
@@ -233,3 +233,89 @@ tags = ["tag1", "tag2"]
 See [the Lektor documentation for queries](https://www.getlektor.com/docs/api/db/query/).
 
 Tags are always deduplicated. Tags are sorted in the order listed in the contents.lr / admin, allowing you to control their order manually. Since `{{ tags }}` simply returns a list, you can always apply any Jinja2 filter on that list such as sort, slice, or rejectattr.
+
+## Tag cloud & tag weights
+
+This plugin won't automatically build a tag cloud, but it provides the tools to build it.
+
+The Jinja2 context has a `tagweights()` function, which returns a dictionary that maps tags to their weight using several attributes or functions. Here are those attributes and functions, with examples of how they can be used in a template.
+
+Unused tags are ignored.
+
+### TL;DR Which weight function should I use?
+
+- To get the number of pages tagged by each tag, use `count`.
+- To map tags to numbers, use `log(lower, upper)`.
+- To map tags to everything else, use `loggroup(list)`.
+
+### `count` — Number of pages tagged with this tag
+
+This is the basic weight, used as a base for the following tags.
+
+#### Example: Tags (with tag count) sorted by tag count (most used first)
+
+```jinja
+<ul>
+{% for tag, weight in (tagweights() | dictsort(by='value', reverse=true)) %}
+    <li>{{ tag }} ({{ weight.count }} articles).</li>
+{% endfor %}
+</ul>
+```
+
+### `linear` — Tags are mapped with a number between `lower` and `upper`.
+
+The less used tag is mapped `lower`, the most used tag is mapped `upper` (`lower` and `upper` can be equal, `upper` can be smaller than `lower`).
+
+Mapping is done using a linear function.
+
+The result is a float: you might want to convert them to integers first (see example for `log`).
+
+Unless you know what you are doing, you should use `log` instead.
+
+### `log` — Logarithm of tag counts are mapped with a number between `lower` and `upper`.
+
+The less used tag is mapped `lower`, the most used tag is mapped `upper` (`lower` and `upper` can be equal, `upper` can be smaller than `lower`).
+
+Mapping is done using a linear function over the logarithm of tag counts.
+
+The result is a float: you might want to convert them to integers first (see example).
+
+#### Example: Most used tag is twice as big as least used tag
+
+```jinja
+{% for tag, weight in tagweights()|dictsort %}
+<a
+    href="{{ ('/blog@tag/' ~ tag)|url }}"
+    style="font-size: {{ weight.log(100, 200)|round|int }}%;"
+    >
+        {{ tag }}
+    </a>
+{% endfor %}
+```
+
+### `lineargroup` — Map each tag with an item of the list given in argument
+
+The less used tag is mapped with the first item, the most used tag is mapped with the last item.
+
+Mapping is done using a linear function.
+
+Unless you know what you are doing, you should use `loggroup` instead.
+
+### `loggroup` — Logarithm of tag counts are mapped with an item of the list given in argument
+
+The less used tag is mapped with the first item, the most used tag is mapped with the last item.
+
+Mapping is done using a linear function over the logarithm of tag counts.
+
+#### Example: Tags are given CSS classes `tagcloud-tiny`, `tagcloud-small`, etc.
+
+```jinja
+{% for tag, weight in tagweights()|dictsort %}
+<a
+    href="{{ ('/blog@tag/' ~ tag)|url }}"
+    class="tagcloud-{{ weight.loggroup(["tiny", "small", "normal", "big", "large"]) }}"
+    >
+        {{ tag }}
+    </a>
+{% endfor %}
+```
diff --git a/lektor_tags.py b/lektor_tags.py
@@ -1,8 +1,14 @@
 # -*- coding: utf-8 -*-
+import collections
+import contextlib
 import posixpath
+from dataclasses import dataclass
+from functools import total_ordering
+from math import log
 
 import pkg_resources
 from lektor.build_programs import BuildProgram
+from lektor.context import get_ctx
 from lektor.environment import Expression
 from lektor.environment import FormatExpression
 from lektor.pluginsystem import Plugin
@@ -64,6 +70,84 @@ def build_artifact(self, artifact):
         artifact.render_template_into(self.source.template_name, this=self.source)
 
 
+@total_ordering
+@dataclass
+class TagWeight:
+
+    count: int
+    mincount: int
+    maxcount: int
+
+    def __lt__(self, other):
+        if isinstance(other, self.__class__):
+            return self.count < other.count
+        return NotImplemented
+
+    def __eq__(self, other):
+        if isinstance(other, self.__class__):
+            return self.count == other.count
+        return NotImplemented
+
+    def linear(self, lower, upper):
+        """Map tag with a number between `lower` and `upper`.
+
+        The least used tag is mapped `lower`, the most used tag is mapped `upper`.
+        Mapping is done using a linear function.
+        """
+        if self.mincount == self.maxcount:
+            return lower
+        return lower + (upper - lower) * (self.count - self.mincount) / (
+            self.maxcount - self.mincount
+        )
+
+    def lineargroup(self, groups):
+        """Map each tag with an item of list `groups`.
+
+        The least used tag is mapped with the first item, the most used tag is mapped with the last item.
+        Mapping is done using a linear function.
+        """
+        return groups[int(round(self.linear(0, len(groups) - 1)))]
+
+    def log(self, lower, upper):
+        """Map each tag with a number between `lower` and `upper`.
+
+        The least used tag is mapped `lower`, the most used tag is mapped `upper`.
+        Mapping is done using a linear function over the logarithm of tag counts.
+
+        Theorem: The base of the logarithm used in this function is irrelevant.
+
+        Proof (idea of):
+            Let t0 and t1 be the tag counts of the least and most used tag,
+            a and b the `lower` and `upper` arguments of this function, and l
+            the base of the logarithm used in this function. Let t be the tag
+            count of an arbitrary tag.
+            To what number is t mapped?
+
+            Let f be the linear function such that f(log(t0)/log(l))=a and
+            f(log(t1)/log(l))=b.
+
+            The expression of this function is:
+            f(x) = ((b-a)×log(l)×x+a×log(t0)-b×log(t1))/(log(t1)-log(t0)).
+
+            Thus, the arbitrary tag t is mapped to f(log(t)/log(l)), and
+            the `log(l)` is crossed out and `l` disappears: the number `l`
+            is irrelevant.
+        """
+        if self.mincount == self.maxcount:
+            return lower
+        return lower + (upper - lower) * log(self.count / self.mincount) / log(
+            self.maxcount / self.mincount
+        )
+
+    def loggroup(self, groups):
+        """Map each tag with an item of list `groups`.
+
+        The least used tag is mapped with the first item, the most used tag is mapped with the last item.
+        Mapping is done using a linear function over the logarithm of tag counts.
+        """
+        return groups[int(round(self.log(0, len(groups) - 1)))]
+
+
 class TagsPlugin(Plugin):
     name = u"Tags"
     description = u"Lektor plugin to add tags."
@@ -74,6 +158,7 @@ class TagsPlugin(Plugin):
     def on_setup_env(self, **extra):
         pkg_dir = pkg_resources.resource_filename("lektor_tags", "templates")
         self.env.jinja_env.loader.searchpath.append(pkg_dir)
+        self.env.jinja_env.globals["tagweights"] = self.tagweights
         self.env.add_build_program(TagPage, TagPageBuildProgram)
 
         @self.env.urlresolver
@@ -150,3 +235,32 @@ def get_all_tags(self, parent):
 
     def ignore_missing(self):
         return bool_from_string(self.get_config().get("ignore_missing"), False)
+
+    def tagcount(self):
+        """Map each tag to the number of pages tagged with it."""
+        # Count tags, to be aggregated as "tag weights". Note that tags that
+        # only appear in non-discoverable pages are ignored.
+        tagcount = collections.Counter()
+        for page in get_ctx().pad.query(self.get_parent_path()):
+            with contextlib.suppress(KeyError, TypeError):
+                tagcount.update(page[self.get_tag_field_name()])
+        return tagcount
+
+    def tagweights(self):
+        """Return the dictionary of tag weights.
+
+        That is:
+            - keys are tags (strings);
+            - weights are TagWeight objects.
+
+        This function is to be called AFTER the build have started
+        (so that ``get_ctx()`` returns something).
+        """
+        tagcount = self.tagcount()
+        if sum(tagcount.values()) == 0:
+            return {}
+
+        return {
+            tag: TagWeight(count, min(tagcount.values()), max(tagcount.values()))
+            for tag, count in tagcount.items()
+        }
diff --git a/setup.cfg b/setup.cfg
@@ -21,6 +21,8 @@ include_package_data = True
 setup_requires =
     setuptools >= 45
     setuptools_scm >= 6
+install_requires =
+    dataclasses;python_version<"3.7"
 
 [options.entry_points]
 lektor.plugins =

diff --git a/tests/test_tagweights.py b/tests/test_tagweights.py
@@ -0,0 +1,96 @@
+from collections import Counter
+
+import pytest
+from lektor.context import Context
+
+from lektor_tags import TagWeight
+
+
+@pytest.fixture
+def tags_plugin(env):
+    return env.plugins["tags"]
+
+
+@pytest.fixture
+def lektor_context(pad):
+    with Context(pad=pad) as ctx:
+        yield ctx
+
+
+@pytest.mark.usefixtures("lektor_context")
+def test_tagcount(tags_plugin):
+    assert tags_plugin.tagcount() == Counter({"tag1": 2, "tag2": 1, "tag3": 1})
+
+
+@pytest.mark.usefixtures("lektor_context")
+def test_tagweights(tags_plugin):
+    assert tags_plugin.tagweights() == {
+        "tag1": TagWeight(2, 1, 2),
+        "tag2": TagWeight(1, 1, 2),
+        "tag3": TagWeight(1, 1, 2),
+    }
+
+
+@pytest.mark.usefixtures("lektor_context")
+def test_tagweights_no_tags(pad, tags_plugin):
+    config = tags_plugin.get_config()
+    config["tags_field"] = "test_no_tags"
+    assert tags_plugin.tagweights() == {}
+
+
+@pytest.fixture
+def tagweight(count, mincount, maxcount):
+    return TagWeight(count, mincount, maxcount)
+
+
+@pytest.mark.parametrize(
+    "count, mincount, maxcount, lower, upper, expected",
+    [
+        (1, 1, 1, 1, 2, 1),
+        (1, 1, 3, 1, 2, 1),
+        (2, 1, 3, 1, 2, 1.5),
+        (3, 1, 3, 1, 2, 2),
+    ],
+)
+def test_TagWeight_linear(tagweight, lower, upper, expected):
+    assert tagweight.linear(lower, upper) == expected
+
+
+@pytest.mark.parametrize(
+    "count, mincount, maxcount, groups, expected",
+    [
+        (1, 1, 4, ("a", "b"), "a"),
+        (2, 1, 4, ("a", "b"), "a"),
+        (3, 1, 4, ("a", "b"), "b"),
+        (4, 1, 4, ("a", "b"), "b"),
+    ],
+)
+def test_TagWeight_lineargroup(tagweight, groups, expected):
+    assert tagweight.lineargroup(groups) == expected
+
+
+@pytest.mark.parametrize(
+    "count, mincount, maxcount, lower, upper, expected",
+    [
+        (1, 1, 1, 1, 3, 1),
+        (1, 1, 4, 1, 3, 1),
+        (2, 1, 4, 1, 3, 2),
+        (4, 1, 4, 1, 3, 3),
+    ],
+)
+def test_TagWeight_log(tagweight, lower, upper, expected):
+    assert tagweight.log(lower, upper) == expected
+
+
+@pytest.mark.parametrize(
+    "count, mincount, maxcount, groups, expected",
+    [
+        (1, 1, 100, ("a", "b", "c"), "a"),
+        (3, 1, 100, ("a", "b", "c"), "a"),
+        (12, 1, 100, ("a", "b", "c"), "b"),
+        (90, 1, 100, ("a", "b", "c"), "c"),
+        (100, 1, 100, ("a", "b", "c"), "c"),
+    ],
+)
+def test_TagWeight_loggroup(tagweight, groups, expected):
+    assert tagweight.loggroup(groups) == expected