diff --git a/.gitignore b/.gitignore index 0f92cf73..141c72ef 100644 --- a/.gitignore +++ b/.gitignore @@ -9,6 +9,7 @@ /ferenda.egg-info/ /htmlcov/ /netstandards +/lagen.nu /wip __pycache__/ /doc/_build/ diff --git a/doc/conf.py b/doc/conf.py index e7488621..cd4b90c8 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -258,3 +258,14 @@ def add_directive_header(self,sig): ModuleLevelDocumenter.add_directive_header(self,sig) # omit the rest DataDocumenter.add_directive_header = add_directive_header + + +def skip_some_data_attributes(app, what, name, obj, skip, options): + if name == "defaults" and what == "class" and isinstance(obj, dict): + print("Skipping the attribute %r" % name) + return True + else: + return skip + +def setup(app): + app.connect('autodoc-skip-member', skip_some_data_attributes) diff --git a/doc/facets.rst b/doc/facets.rst index 6064067c..aec3d8d9 100644 --- a/doc/facets.rst +++ b/doc/facets.rst @@ -18,25 +18,16 @@ data, functions (or other callables) that sorts the data into discrete groups, and other parameters that affect eg. the sorting order or if a particular facet is used in a particular context. -Predefined facets and default behaviour ----------------------------------------- - -============= ======================= -facet Description of grouping -============= ======================= -rdf:type Grouped by qname of type -------------- ----------------------- -dcterms:title Grouped by first letter -============= ======================= - - -Predefined selectors --------------------- - - -Combining facets from different docrepos ----------------------------------------- - +The grouping is primarily done through a selector function. The +selector function recieves a dict with some basic information about +one document, the name of the current facet (binding), and optionally +some repo-dependent extra data in the form of an RDF graph. It should +return a single string. The selector is called once (at least) for +every document in the docrepo, and each resulting group contains those +documents that the selector returned identical strings for. As a +simple example, a selector may group documents into years of +publication by finding the date of the ``dcterms:issued`` property and +extracting the year part of it. Contexts where facets are used ------------------------------ @@ -58,3 +49,11 @@ two facets based on ``dcterms:publisher`` where one uses URI references and the other uses. +Grouping a document in several groups +------------------------------------- + + +Combining facets from different docrepos +---------------------------------------- + + diff --git a/ferenda/facet.py b/ferenda/facet.py index 80284a9d..6103484d 100644 --- a/ferenda/facet.py +++ b/ferenda/facet.py @@ -13,12 +13,102 @@ from ferenda import util class Facet(object): + """Create a facet from the given rdftype and some optional parameters. + + :param rdftype: The type of facet being created + :type rdftype: rdflib.term.URIRef + :param label: TBW + :param pagetitle: TBW + :param indexingtype: TBW + :param selector: TBW + :param key: TBW + :param toplevel_only: TBW + :param use_for_toc: TBW + :param selector_descending: TBW + :param key_descending: TBW + :param multiple_values: TBW + :param dimension_type: TBW + :param dimension_label: TBW + + If optional parameters aren't given, then appropriate values are + selected if rdfrtype is one of some common rdf properties: + + =================== ====================================================== + facet description + =================== ====================================================== + rdf:type Grouped by :py:meth:`~rdflib.graph.Graph.qname` of the + ``rdf:type`` of the document, eg. ``foaf:Document``. + Not used for toc + ------------------- ------------------------------------------------------ + dcterms:title Grouped by first "sortable" letter, eg for a document + titled "The Little Prince" returns "l". Is used as a + facet for the API, but it's debatable if it's useful + ------------------- ------------------------------------------------------ + dcterms:identifier Also grouped by first sortable letter. When indexing, + the resulting fulltext index field has a high boost + value, which increases the chances of this document + ranking high when one searches for its identifier. + ------------------- ------------------------------------------------------ + dcterms:abstract Not used for toc + ------------------- ------------------------------------------------------ + dc:creator Should be a free-test (string literal) value + ------------------- ------------------------------------------------------ + dcterms:publisher Should be a URIRef + ------------------- ------------------------------------------------------ + dcterms:references + ------------------- ------------------------------------------------------ + dcterms:issued Used for grouping documents published/issued in the + same year + ------------------- ------------------------------------------------------ + dc:subject A document can have multiple dc:subjects and all are + indexed/processed + ------------------- ------------------------------------------------------ + dcterms:subject Works like dc:subject, but the value should be a + URIRef + ------------------- ------------------------------------------------------ + schema:free A boolean value + =================== ====================================================== + + This module contains a number of classmethods that can be used as + arguments to ``selector`` and ``key``, eg + + >>> from rdflib import Namespace + >>> MYVOCAB = Namespace("http://example.org/vocab/") + >>> f = Facet(MYVOCAB.enactmentDate, selector=Facet.year) + >>> f.selector({'myvocab_enactmentDate': '2014-07-06'}, + ... 'myvocab_enactmentDate') + '2014' + + """ + + @classmethod def defaultselector(cls, row, binding, resource_graph=None): + + """This returns ``row[binding]`` without any transformation. + + >>> row = {"rdf_type": "http://purl.org/ontology/bibo/Book", + ... "dcterms_title": "A Tale of Two Cities", + ... "dcterms_issued": "1859-04-30", + ... "dcterms_publisher": "http://example.org/chapman_hall", + ... "schema_free": "true"} + >>> Facet.defaultselector(row, "dcterms_title") + 'A Tale of Two Cities' + """ return row[binding] @classmethod def year(cls, row, binding='dcterms_issued', resource_graph=None): + """This returns the the year part of ``row[binding]``. + + >>> row = {"rdf_type": "http://purl.org/ontology/bibo/Book", + ... "dcterms_title": "A Tale of Two Cities", + ... "dcterms_issued": "1859-04-30", + ... "dcterms_publisher": "http://example.org/chapman_hall", + ... "schema_free": "true"} + >>> Facet.year(row, "dcterms_issued") + '1859' + """ datestring = row[binding] # assume a date(time) like '2014-06-05T12:00:00', '2014-06-05' # or even '2014-06' @@ -30,21 +120,79 @@ def year(cls, row, binding='dcterms_issued', resource_graph=None): @classmethod def booleanvalue(cls, row, binding='schema_free', resource_graph=None): + """ + Returns True iff row[binding] == "true", False otherwise. + + >>> row = {"rdf_type": "http://purl.org/ontology/bibo/Book", + ... "dcterms_title": "A Tale of Two Cities", + ... "dcterms_issued": "1859-04-30", + ... "dcterms_publisher": "http://example.org/chapman_hall", + ... "schema_free": "true"} + >>> Facet.booleanvalue(row, "schema_free") + True + """ # only 'true' is True, everything else is False return row[binding] == 'true' + @classmethod def titlesortkey(cls, row, binding='dcterms_title', resource_graph=None): - # ingnore provided binding -- this key func sorts by dcterms:title, period. + """Returns a version of row[binding] suitable for sorting. The + function :py:func:`~ferenda.util.title_sortkey` is used for + string transformation. + + >>> row = {"rdf_type": "http://purl.org/ontology/bibo/Book", + ... "dcterms_title": "A Tale of Two Cities", + ... "dcterms_issued": "1859-04-30", + ... "dcterms_publisher": "http://example.org/chapman_hall", + ... "schema_free": "true"} + >>> Facet.titlesortkey(row, "dcterms_title") + 'ataleoftwocities' + + """ + True + # ignore provided binding -- this key func sorts by + # dcterms:title, period. + # FIXME: Why was that, now again? title = row['dcterms_title'] return util.title_sortkey(title) @classmethod def firstletter(cls, row, binding='dcterms_title', resource_graph=None): + """Returns the first letter of row[binding], transformed into a + sortable string. + + >>> row = {"rdf_type": "http://purl.org/ontology/bibo/Book", + ... "dcterms_title": "A Tale of Two Cities", + ... "dcterms_issued": "1859-04-30", + ... "dcterms_publisher": "http://example.org/chapman_hall", + ... "schema_free": "true"} + >>> Facet.firstletter(row, "dcterms_title") + 'a' + + """ return cls.titlesortkey(row, binding)[0] @classmethod def resourcelabel(cls, row, binding='dcterms_publisher', resource_graph=None): + """Lookup a suitable text label for row[binding] in resource_graph. + + >>> row = {"rdf_type": "http://purl.org/ontology/bibo/Book", + ... "dcterms_title": "A Tale of Two Cities", + ... "dcterms_issued": "1859-04-30", + ... "dcterms_publisher": "http://example.org/chapman_hall", + ... "schema_free": "true"} + >>> import rdflib + >>> resources = rdflib.Graph().parse(format="turtle", data=\""" + ... @prefix foaf: . + ... + ... a foaf:Organization; + ... foaf:name "Chapman & Hall" . + ... + ... \""") + >>> Facet.resourcelabel(row, "dcterms_publisher", resources) + 'Chapman & Hall' + """ uri = URIRef(row[binding]) for pred in (RDFS.label, SKOS.prefLabel, SKOS.altLabel, DCTERMS.title, DCTERMS.alternative, FOAF.name): if resource_graph.value(uri, pred): @@ -54,11 +202,46 @@ def resourcelabel(cls, row, binding='dcterms_publisher', resource_graph=None): @classmethod def sortresource(cls, row, binding='dcterms_publisher', resource_graph=None): + """Returns a sortable version of the resource label for + ``row[binding]``. + + >>> row = {"rdf_type": "http://purl.org/ontology/bibo/Book", + ... "dcterms_title": "A Tale of Two Cities", + ... "dcterms_issued": "1859-04-30", + ... "dcterms_publisher": "http://example.org/chapman_hall", + ... "schema_free": "true"} + >>> import rdflib + >>> resources = rdflib.Graph().parse(format="turtle", data=\""" + ... @prefix foaf: . + ... + ... a foaf:Organization; + ... foaf:name "Chapman & Hall" . + ... + ... \""") + >>> Facet.sortresource(row, "dcterms_publisher", resources) + 'chapmanhall' + """ row[binding] = cls.resourcelabel(row, binding, resource_graph) - return cls.titlesortkey(row, binding) + # workaround the way titlesortkey works + return cls.titlesortkey({'dcterms_title': row[binding]}, binding) @classmethod def qname(cls, row, binding='rdf_type', resource_graph=None): + """Returns the qname of the rdf URIref contained in row[binding], as + determined by the namespace prefixes registered in + resource_graph. + + >>> row = {"rdf_type": "http://purl.org/ontology/bibo/Book", + ... "dcterms_title": "A Tale of Two Cities", + ... "dcterms_issued": "1859-04-30", + ... "dcterms_publisher": "http://example.org/chapman_hall", + ... "schema_free": "true"} + >>> import rdflib + >>> resources = rdflib.Graph() + >>> resources.bind("bibo", "http://purl.org/ontology/bibo/") + >>> Facet.qname(row, "rdf_type", resources) + 'bibo:Book' + """ u = URIRef(row[binding]) return resource_graph.qname(u) @@ -84,7 +267,7 @@ def __init__(self, dimension_type = None, # could be determined by indexingtype dimension_label = None ): - + def _finddefault(provided, rdftype, argumenttype, default): if provided is None: if rdftype in self.defaults and argumenttype in self.defaults[rdftype]: @@ -221,4 +404,3 @@ def __eq__(self, other): 'dimension_type': 'value' } } - diff --git a/ferenda/resources.py b/ferenda/resources.py index 1e21027e..ba2d457e 100644 --- a/ferenda/resources.py +++ b/ferenda/resources.py @@ -114,7 +114,7 @@ def make_resources_xml(self, cssfiles, jsfiles): ) ), E.stylesheets(*self._li_wrap(cssfiles, 'link', 'href', rel="stylesheet")), - E.javascripts(*self._li_wrap(jsfiles, 'script', 'src')) + E.javascripts(*self._li_wrap(jsfiles, 'script', 'src', text=" ")) ) if not self.config.staticsite: @@ -140,11 +140,13 @@ def make_resources_xml(self, cssfiles, jsfiles): # FIXME: When creating