diff --git a/.gitignore b/.gitignore
index 0f92cf73..141c72ef 100644
--- a/.gitignore
+++ b/.gitignore
@@ -9,6 +9,7 @@
/ferenda.egg-info/
/htmlcov/
/netstandards
+/lagen.nu
/wip
__pycache__/
/doc/_build/
diff --git a/doc/conf.py b/doc/conf.py
index e7488621..cd4b90c8 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -258,3 +258,14 @@ def add_directive_header(self,sig):
ModuleLevelDocumenter.add_directive_header(self,sig)
# omit the rest
DataDocumenter.add_directive_header = add_directive_header
+
+
+def skip_some_data_attributes(app, what, name, obj, skip, options):
+ if name == "defaults" and what == "class" and isinstance(obj, dict):
+ print("Skipping the attribute %r" % name)
+ return True
+ else:
+ return skip
+
+def setup(app):
+ app.connect('autodoc-skip-member', skip_some_data_attributes)
diff --git a/doc/facets.rst b/doc/facets.rst
index 6064067c..aec3d8d9 100644
--- a/doc/facets.rst
+++ b/doc/facets.rst
@@ -18,25 +18,16 @@ data, functions (or other callables) that sorts the data into discrete
groups, and other parameters that affect eg. the sorting order or if a
particular facet is used in a particular context.
-Predefined facets and default behaviour
-----------------------------------------
-
-============= =======================
-facet Description of grouping
-============= =======================
-rdf:type Grouped by qname of type
-------------- -----------------------
-dcterms:title Grouped by first letter
-============= =======================
-
-
-Predefined selectors
---------------------
-
-
-Combining facets from different docrepos
-----------------------------------------
-
+The grouping is primarily done through a selector function. The
+selector function recieves a dict with some basic information about
+one document, the name of the current facet (binding), and optionally
+some repo-dependent extra data in the form of an RDF graph. It should
+return a single string. The selector is called once (at least) for
+every document in the docrepo, and each resulting group contains those
+documents that the selector returned identical strings for. As a
+simple example, a selector may group documents into years of
+publication by finding the date of the ``dcterms:issued`` property and
+extracting the year part of it.
Contexts where facets are used
------------------------------
@@ -58,3 +49,11 @@ two facets based on ``dcterms:publisher`` where one uses URI
references and the other uses.
+Grouping a document in several groups
+-------------------------------------
+
+
+Combining facets from different docrepos
+----------------------------------------
+
+
diff --git a/ferenda/facet.py b/ferenda/facet.py
index 80284a9d..6103484d 100644
--- a/ferenda/facet.py
+++ b/ferenda/facet.py
@@ -13,12 +13,102 @@
from ferenda import util
class Facet(object):
+ """Create a facet from the given rdftype and some optional parameters.
+
+ :param rdftype: The type of facet being created
+ :type rdftype: rdflib.term.URIRef
+ :param label: TBW
+ :param pagetitle: TBW
+ :param indexingtype: TBW
+ :param selector: TBW
+ :param key: TBW
+ :param toplevel_only: TBW
+ :param use_for_toc: TBW
+ :param selector_descending: TBW
+ :param key_descending: TBW
+ :param multiple_values: TBW
+ :param dimension_type: TBW
+ :param dimension_label: TBW
+
+ If optional parameters aren't given, then appropriate values are
+ selected if rdfrtype is one of some common rdf properties:
+
+ =================== ======================================================
+ facet description
+ =================== ======================================================
+ rdf:type Grouped by :py:meth:`~rdflib.graph.Graph.qname` of the
+ ``rdf:type`` of the document, eg. ``foaf:Document``.
+ Not used for toc
+ ------------------- ------------------------------------------------------
+ dcterms:title Grouped by first "sortable" letter, eg for a document
+ titled "The Little Prince" returns "l". Is used as a
+ facet for the API, but it's debatable if it's useful
+ ------------------- ------------------------------------------------------
+ dcterms:identifier Also grouped by first sortable letter. When indexing,
+ the resulting fulltext index field has a high boost
+ value, which increases the chances of this document
+ ranking high when one searches for its identifier.
+ ------------------- ------------------------------------------------------
+ dcterms:abstract Not used for toc
+ ------------------- ------------------------------------------------------
+ dc:creator Should be a free-test (string literal) value
+ ------------------- ------------------------------------------------------
+ dcterms:publisher Should be a URIRef
+ ------------------- ------------------------------------------------------
+ dcterms:references
+ ------------------- ------------------------------------------------------
+ dcterms:issued Used for grouping documents published/issued in the
+ same year
+ ------------------- ------------------------------------------------------
+ dc:subject A document can have multiple dc:subjects and all are
+ indexed/processed
+ ------------------- ------------------------------------------------------
+ dcterms:subject Works like dc:subject, but the value should be a
+ URIRef
+ ------------------- ------------------------------------------------------
+ schema:free A boolean value
+ =================== ======================================================
+
+ This module contains a number of classmethods that can be used as
+ arguments to ``selector`` and ``key``, eg
+
+ >>> from rdflib import Namespace
+ >>> MYVOCAB = Namespace("http://example.org/vocab/")
+ >>> f = Facet(MYVOCAB.enactmentDate, selector=Facet.year)
+ >>> f.selector({'myvocab_enactmentDate': '2014-07-06'},
+ ... 'myvocab_enactmentDate')
+ '2014'
+
+ """
+
+
@classmethod
def defaultselector(cls, row, binding, resource_graph=None):
+
+ """This returns ``row[binding]`` without any transformation.
+
+ >>> row = {"rdf_type": "http://purl.org/ontology/bibo/Book",
+ ... "dcterms_title": "A Tale of Two Cities",
+ ... "dcterms_issued": "1859-04-30",
+ ... "dcterms_publisher": "http://example.org/chapman_hall",
+ ... "schema_free": "true"}
+ >>> Facet.defaultselector(row, "dcterms_title")
+ 'A Tale of Two Cities'
+ """
return row[binding]
@classmethod
def year(cls, row, binding='dcterms_issued', resource_graph=None):
+ """This returns the the year part of ``row[binding]``.
+
+ >>> row = {"rdf_type": "http://purl.org/ontology/bibo/Book",
+ ... "dcterms_title": "A Tale of Two Cities",
+ ... "dcterms_issued": "1859-04-30",
+ ... "dcterms_publisher": "http://example.org/chapman_hall",
+ ... "schema_free": "true"}
+ >>> Facet.year(row, "dcterms_issued")
+ '1859'
+ """
datestring = row[binding]
# assume a date(time) like '2014-06-05T12:00:00', '2014-06-05'
# or even '2014-06'
@@ -30,21 +120,79 @@ def year(cls, row, binding='dcterms_issued', resource_graph=None):
@classmethod
def booleanvalue(cls, row, binding='schema_free', resource_graph=None):
+ """
+ Returns True iff row[binding] == "true", False otherwise.
+
+ >>> row = {"rdf_type": "http://purl.org/ontology/bibo/Book",
+ ... "dcterms_title": "A Tale of Two Cities",
+ ... "dcterms_issued": "1859-04-30",
+ ... "dcterms_publisher": "http://example.org/chapman_hall",
+ ... "schema_free": "true"}
+ >>> Facet.booleanvalue(row, "schema_free")
+ True
+ """
# only 'true' is True, everything else is False
return row[binding] == 'true'
+
@classmethod
def titlesortkey(cls, row, binding='dcterms_title', resource_graph=None):
- # ingnore provided binding -- this key func sorts by dcterms:title, period.
+ """Returns a version of row[binding] suitable for sorting. The
+ function :py:func:`~ferenda.util.title_sortkey` is used for
+ string transformation.
+
+ >>> row = {"rdf_type": "http://purl.org/ontology/bibo/Book",
+ ... "dcterms_title": "A Tale of Two Cities",
+ ... "dcterms_issued": "1859-04-30",
+ ... "dcterms_publisher": "http://example.org/chapman_hall",
+ ... "schema_free": "true"}
+ >>> Facet.titlesortkey(row, "dcterms_title")
+ 'ataleoftwocities'
+
+ """
+ True
+ # ignore provided binding -- this key func sorts by
+ # dcterms:title, period.
+ # FIXME: Why was that, now again?
title = row['dcterms_title']
return util.title_sortkey(title)
@classmethod
def firstletter(cls, row, binding='dcterms_title', resource_graph=None):
+ """Returns the first letter of row[binding], transformed into a
+ sortable string.
+
+ >>> row = {"rdf_type": "http://purl.org/ontology/bibo/Book",
+ ... "dcterms_title": "A Tale of Two Cities",
+ ... "dcterms_issued": "1859-04-30",
+ ... "dcterms_publisher": "http://example.org/chapman_hall",
+ ... "schema_free": "true"}
+ >>> Facet.firstletter(row, "dcterms_title")
+ 'a'
+
+ """
return cls.titlesortkey(row, binding)[0]
@classmethod
def resourcelabel(cls, row, binding='dcterms_publisher', resource_graph=None):
+ """Lookup a suitable text label for row[binding] in resource_graph.
+
+ >>> row = {"rdf_type": "http://purl.org/ontology/bibo/Book",
+ ... "dcterms_title": "A Tale of Two Cities",
+ ... "dcterms_issued": "1859-04-30",
+ ... "dcterms_publisher": "http://example.org/chapman_hall",
+ ... "schema_free": "true"}
+ >>> import rdflib
+ >>> resources = rdflib.Graph().parse(format="turtle", data=\"""
+ ... @prefix foaf: .
+ ...
+ ... a foaf:Organization;
+ ... foaf:name "Chapman & Hall" .
+ ...
+ ... \""")
+ >>> Facet.resourcelabel(row, "dcterms_publisher", resources)
+ 'Chapman & Hall'
+ """
uri = URIRef(row[binding])
for pred in (RDFS.label, SKOS.prefLabel, SKOS.altLabel, DCTERMS.title, DCTERMS.alternative, FOAF.name):
if resource_graph.value(uri, pred):
@@ -54,11 +202,46 @@ def resourcelabel(cls, row, binding='dcterms_publisher', resource_graph=None):
@classmethod
def sortresource(cls, row, binding='dcterms_publisher', resource_graph=None):
+ """Returns a sortable version of the resource label for
+ ``row[binding]``.
+
+ >>> row = {"rdf_type": "http://purl.org/ontology/bibo/Book",
+ ... "dcterms_title": "A Tale of Two Cities",
+ ... "dcterms_issued": "1859-04-30",
+ ... "dcterms_publisher": "http://example.org/chapman_hall",
+ ... "schema_free": "true"}
+ >>> import rdflib
+ >>> resources = rdflib.Graph().parse(format="turtle", data=\"""
+ ... @prefix foaf: .
+ ...
+ ... a foaf:Organization;
+ ... foaf:name "Chapman & Hall" .
+ ...
+ ... \""")
+ >>> Facet.sortresource(row, "dcterms_publisher", resources)
+ 'chapmanhall'
+ """
row[binding] = cls.resourcelabel(row, binding, resource_graph)
- return cls.titlesortkey(row, binding)
+ # workaround the way titlesortkey works
+ return cls.titlesortkey({'dcterms_title': row[binding]}, binding)
@classmethod
def qname(cls, row, binding='rdf_type', resource_graph=None):
+ """Returns the qname of the rdf URIref contained in row[binding], as
+ determined by the namespace prefixes registered in
+ resource_graph.
+
+ >>> row = {"rdf_type": "http://purl.org/ontology/bibo/Book",
+ ... "dcterms_title": "A Tale of Two Cities",
+ ... "dcterms_issued": "1859-04-30",
+ ... "dcterms_publisher": "http://example.org/chapman_hall",
+ ... "schema_free": "true"}
+ >>> import rdflib
+ >>> resources = rdflib.Graph()
+ >>> resources.bind("bibo", "http://purl.org/ontology/bibo/")
+ >>> Facet.qname(row, "rdf_type", resources)
+ 'bibo:Book'
+ """
u = URIRef(row[binding])
return resource_graph.qname(u)
@@ -84,7 +267,7 @@ def __init__(self,
dimension_type = None, # could be determined by indexingtype
dimension_label = None
):
-
+
def _finddefault(provided, rdftype, argumenttype, default):
if provided is None:
if rdftype in self.defaults and argumenttype in self.defaults[rdftype]:
@@ -221,4 +404,3 @@ def __eq__(self, other):
'dimension_type': 'value'
}
}
-
diff --git a/ferenda/resources.py b/ferenda/resources.py
index 1e21027e..ba2d457e 100644
--- a/ferenda/resources.py
+++ b/ferenda/resources.py
@@ -114,7 +114,7 @@ def make_resources_xml(self, cssfiles, jsfiles):
)
),
E.stylesheets(*self._li_wrap(cssfiles, 'link', 'href', rel="stylesheet")),
- E.javascripts(*self._li_wrap(jsfiles, 'script', 'src'))
+ E.javascripts(*self._li_wrap(jsfiles, 'script', 'src', text=" "))
)
if not self.config.staticsite:
@@ -140,11 +140,13 @@ def make_resources_xml(self, cssfiles, jsfiles):
# FIXME: When creating