Skip to content

Commit

Permalink
documented ferenda.Facet and made some more tests while at it
Browse files Browse the repository at this point in the history
  • Loading branch information
staffanm committed Jul 6, 2014
1 parent f704755 commit 50f80b3
Show file tree
Hide file tree
Showing 8 changed files with 293 additions and 29 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
/ferenda.egg-info/
/htmlcov/
/netstandards
/lagen.nu
/wip
__pycache__/
/doc/_build/
11 changes: 11 additions & 0 deletions doc/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -258,3 +258,14 @@ def add_directive_header(self,sig):
ModuleLevelDocumenter.add_directive_header(self,sig)
# omit the rest
DataDocumenter.add_directive_header = add_directive_header


def skip_some_data_attributes(app, what, name, obj, skip, options):
if name == "defaults" and what == "class" and isinstance(obj, dict):
print("Skipping the attribute %r" % name)
return True
else:
return skip

def setup(app):
app.connect('autodoc-skip-member', skip_some_data_attributes)
37 changes: 18 additions & 19 deletions doc/facets.rst
Original file line number Diff line number Diff line change
Expand Up @@ -18,25 +18,16 @@ data, functions (or other callables) that sorts the data into discrete
groups, and other parameters that affect eg. the sorting order or if a
particular facet is used in a particular context.

Predefined facets and default behaviour
----------------------------------------

============= =======================
facet Description of grouping
============= =======================
rdf:type Grouped by qname of type
------------- -----------------------
dcterms:title Grouped by first letter
============= =======================


Predefined selectors
--------------------


Combining facets from different docrepos
----------------------------------------

The grouping is primarily done through a selector function. The
selector function recieves a dict with some basic information about
one document, the name of the current facet (binding), and optionally
some repo-dependent extra data in the form of an RDF graph. It should
return a single string. The selector is called once (at least) for
every document in the docrepo, and each resulting group contains those
documents that the selector returned identical strings for. As a
simple example, a selector may group documents into years of
publication by finding the date of the ``dcterms:issued`` property and
extracting the year part of it.

Contexts where facets are used
------------------------------
Expand All @@ -58,3 +49,11 @@ two facets based on ``dcterms:publisher`` where one uses URI
references and the other uses.


Grouping a document in several groups
-------------------------------------


Combining facets from different docrepos
----------------------------------------


190 changes: 186 additions & 4 deletions ferenda/facet.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,102 @@
from ferenda import util

class Facet(object):
"""Create a facet from the given rdftype and some optional parameters.
:param rdftype: The type of facet being created
:type rdftype: rdflib.term.URIRef
:param label: TBW
:param pagetitle: TBW
:param indexingtype: TBW
:param selector: TBW
:param key: TBW
:param toplevel_only: TBW
:param use_for_toc: TBW
:param selector_descending: TBW
:param key_descending: TBW
:param multiple_values: TBW
:param dimension_type: TBW
:param dimension_label: TBW
If optional parameters aren't given, then appropriate values are
selected if rdfrtype is one of some common rdf properties:
=================== ======================================================
facet description
=================== ======================================================
rdf:type Grouped by :py:meth:`~rdflib.graph.Graph.qname` of the
``rdf:type`` of the document, eg. ``foaf:Document``.
Not used for toc
------------------- ------------------------------------------------------
dcterms:title Grouped by first "sortable" letter, eg for a document
titled "The Little Prince" returns "l". Is used as a
facet for the API, but it's debatable if it's useful
------------------- ------------------------------------------------------
dcterms:identifier Also grouped by first sortable letter. When indexing,
the resulting fulltext index field has a high boost
value, which increases the chances of this document
ranking high when one searches for its identifier.
------------------- ------------------------------------------------------
dcterms:abstract Not used for toc
------------------- ------------------------------------------------------
dc:creator Should be a free-test (string literal) value
------------------- ------------------------------------------------------
dcterms:publisher Should be a URIRef
------------------- ------------------------------------------------------
dcterms:references
------------------- ------------------------------------------------------
dcterms:issued Used for grouping documents published/issued in the
same year
------------------- ------------------------------------------------------
dc:subject A document can have multiple dc:subjects and all are
indexed/processed
------------------- ------------------------------------------------------
dcterms:subject Works like dc:subject, but the value should be a
URIRef
------------------- ------------------------------------------------------
schema:free A boolean value
=================== ======================================================
This module contains a number of classmethods that can be used as
arguments to ``selector`` and ``key``, eg
>>> from rdflib import Namespace
>>> MYVOCAB = Namespace("http://example.org/vocab/")
>>> f = Facet(MYVOCAB.enactmentDate, selector=Facet.year)
>>> f.selector({'myvocab_enactmentDate': '2014-07-06'},
... 'myvocab_enactmentDate')
'2014'
"""


@classmethod
def defaultselector(cls, row, binding, resource_graph=None):

"""This returns ``row[binding]`` without any transformation.
>>> row = {"rdf_type": "http://purl.org/ontology/bibo/Book",
... "dcterms_title": "A Tale of Two Cities",
... "dcterms_issued": "1859-04-30",
... "dcterms_publisher": "http://example.org/chapman_hall",
... "schema_free": "true"}
>>> Facet.defaultselector(row, "dcterms_title")
'A Tale of Two Cities'
"""
return row[binding]

@classmethod
def year(cls, row, binding='dcterms_issued', resource_graph=None):
"""This returns the the year part of ``row[binding]``.
>>> row = {"rdf_type": "http://purl.org/ontology/bibo/Book",
... "dcterms_title": "A Tale of Two Cities",
... "dcterms_issued": "1859-04-30",
... "dcterms_publisher": "http://example.org/chapman_hall",
... "schema_free": "true"}
>>> Facet.year(row, "dcterms_issued")
'1859'
"""
datestring = row[binding]
# assume a date(time) like '2014-06-05T12:00:00', '2014-06-05'
# or even '2014-06'
Expand All @@ -30,21 +120,79 @@ def year(cls, row, binding='dcterms_issued', resource_graph=None):

@classmethod
def booleanvalue(cls, row, binding='schema_free', resource_graph=None):
"""
Returns True iff row[binding] == "true", False otherwise.
>>> row = {"rdf_type": "http://purl.org/ontology/bibo/Book",
... "dcterms_title": "A Tale of Two Cities",
... "dcterms_issued": "1859-04-30",
... "dcterms_publisher": "http://example.org/chapman_hall",
... "schema_free": "true"}
>>> Facet.booleanvalue(row, "schema_free")
True
"""
# only 'true' is True, everything else is False
return row[binding] == 'true'


@classmethod
def titlesortkey(cls, row, binding='dcterms_title', resource_graph=None):
# ingnore provided binding -- this key func sorts by dcterms:title, period.
"""Returns a version of row[binding] suitable for sorting. The
function :py:func:`~ferenda.util.title_sortkey` is used for
string transformation.
>>> row = {"rdf_type": "http://purl.org/ontology/bibo/Book",
... "dcterms_title": "A Tale of Two Cities",
... "dcterms_issued": "1859-04-30",
... "dcterms_publisher": "http://example.org/chapman_hall",
... "schema_free": "true"}
>>> Facet.titlesortkey(row, "dcterms_title")
'ataleoftwocities'
"""
True
# ignore provided binding -- this key func sorts by
# dcterms:title, period.
# FIXME: Why was that, now again?
title = row['dcterms_title']
return util.title_sortkey(title)

@classmethod
def firstletter(cls, row, binding='dcterms_title', resource_graph=None):
"""Returns the first letter of row[binding], transformed into a
sortable string.
>>> row = {"rdf_type": "http://purl.org/ontology/bibo/Book",
... "dcterms_title": "A Tale of Two Cities",
... "dcterms_issued": "1859-04-30",
... "dcterms_publisher": "http://example.org/chapman_hall",
... "schema_free": "true"}
>>> Facet.firstletter(row, "dcterms_title")
'a'
"""
return cls.titlesortkey(row, binding)[0]

@classmethod
def resourcelabel(cls, row, binding='dcterms_publisher', resource_graph=None):
"""Lookup a suitable text label for row[binding] in resource_graph.
>>> row = {"rdf_type": "http://purl.org/ontology/bibo/Book",
... "dcterms_title": "A Tale of Two Cities",
... "dcterms_issued": "1859-04-30",
... "dcterms_publisher": "http://example.org/chapman_hall",
... "schema_free": "true"}
>>> import rdflib
>>> resources = rdflib.Graph().parse(format="turtle", data=\"""
... @prefix foaf: <http://xmlns.com/foaf/0.1/> .
...
... <http://example.org/chapman_hall> a foaf:Organization;
... foaf:name "Chapman & Hall" .
...
... \""")
>>> Facet.resourcelabel(row, "dcterms_publisher", resources)
'Chapman & Hall'
"""
uri = URIRef(row[binding])
for pred in (RDFS.label, SKOS.prefLabel, SKOS.altLabel, DCTERMS.title, DCTERMS.alternative, FOAF.name):
if resource_graph.value(uri, pred):
Expand All @@ -54,11 +202,46 @@ def resourcelabel(cls, row, binding='dcterms_publisher', resource_graph=None):

@classmethod
def sortresource(cls, row, binding='dcterms_publisher', resource_graph=None):
"""Returns a sortable version of the resource label for
``row[binding]``.
>>> row = {"rdf_type": "http://purl.org/ontology/bibo/Book",
... "dcterms_title": "A Tale of Two Cities",
... "dcterms_issued": "1859-04-30",
... "dcterms_publisher": "http://example.org/chapman_hall",
... "schema_free": "true"}
>>> import rdflib
>>> resources = rdflib.Graph().parse(format="turtle", data=\"""
... @prefix foaf: <http://xmlns.com/foaf/0.1/> .
...
... <http://example.org/chapman_hall> a foaf:Organization;
... foaf:name "Chapman & Hall" .
...
... \""")
>>> Facet.sortresource(row, "dcterms_publisher", resources)
'chapmanhall'
"""
row[binding] = cls.resourcelabel(row, binding, resource_graph)
return cls.titlesortkey(row, binding)
# workaround the way titlesortkey works
return cls.titlesortkey({'dcterms_title': row[binding]}, binding)

@classmethod
def qname(cls, row, binding='rdf_type', resource_graph=None):
"""Returns the qname of the rdf URIref contained in row[binding], as
determined by the namespace prefixes registered in
resource_graph.
>>> row = {"rdf_type": "http://purl.org/ontology/bibo/Book",
... "dcterms_title": "A Tale of Two Cities",
... "dcterms_issued": "1859-04-30",
... "dcterms_publisher": "http://example.org/chapman_hall",
... "schema_free": "true"}
>>> import rdflib
>>> resources = rdflib.Graph()
>>> resources.bind("bibo", "http://purl.org/ontology/bibo/")
>>> Facet.qname(row, "rdf_type", resources)
'bibo:Book'
"""
u = URIRef(row[binding])
return resource_graph.qname(u)

Expand All @@ -84,7 +267,7 @@ def __init__(self,
dimension_type = None, # could be determined by indexingtype
dimension_label = None
):

def _finddefault(provided, rdftype, argumenttype, default):
if provided is None:
if rdftype in self.defaults and argumenttype in self.defaults[rdftype]:
Expand Down Expand Up @@ -221,4 +404,3 @@ def __eq__(self, other):
'dimension_type': 'value'
}
}

8 changes: 5 additions & 3 deletions ferenda/resources.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ def make_resources_xml(self, cssfiles, jsfiles):
)
),
E.stylesheets(*self._li_wrap(cssfiles, 'link', 'href', rel="stylesheet")),
E.javascripts(*self._li_wrap(jsfiles, 'script', 'src'))
E.javascripts(*self._li_wrap(jsfiles, 'script', 'src', text=" "))
)

if not self.config.staticsite:
Expand All @@ -140,11 +140,13 @@ def make_resources_xml(self, cssfiles, jsfiles):
# FIXME: When creating <script> elements, must take care not to
# create self-closing tags (like by creating a single space text
# node)
def _li_wrap(self, items, container, attribute, **kwargs):
def _li_wrap(self, items, container, attribute, text=None, **kwargs):
elements = []
for item in items:
kwargs[attribute] = item
elements.append(etree.Element(container, **kwargs))
e = etree.Element(container, **kwargs)
e.text = text
elements.append(e)
return elements

def _links(self, methodname):
Expand Down
Loading

0 comments on commit 50f80b3

Please sign in to comment.