diff --git a/ferenda/compat.py b/ferenda/compat.py
index e6207b86..a29e6073 100644
--- a/ferenda/compat.py
+++ b/ferenda/compat.py
@@ -21,6 +21,6 @@
import unittest
try:
- from unittest.mock import Mock, patch, call
+ from unittest.mock import Mock, MagicMock, patch, call
except ImportError: # pragma: no cover
- from mock import Mock, patch, call
+ from mock import Mock, MagicMock, patch, call
diff --git a/ferenda/compositerepository.py b/ferenda/compositerepository.py
index 57c56236..98573dac 100644
--- a/ferenda/compositerepository.py
+++ b/ferenda/compositerepository.py
@@ -3,29 +3,34 @@
import os
-from . import DocumentRepository, DocumentStore
-
+from ferenda import DocumentRepository, DocumentStore
+from ferenda import util, errors
class CompositeStore(DocumentStore):
- def __init__(self, datadir, downloaded_suffix=".html", storage_policy="file", docrepos=[]):
+ def __init__(self, datadir, downloaded_suffix=".html",
+ storage_policy="file",
+ docrepo_instances=None):
self.datadir = datadir # docrepo.datadir + docrepo.alias
self.downloaded_suffix = downloaded_suffix
self.storage_policy = storage_policy
- self.docrepos = docrepos
+ if not docrepo_instances:
+ docrepo_instances = {}
+ self.docrepo_instances = docrepo_instances
def list_basefiles_for(self, action, basedir=None):
if not basedir:
basedir = self.datadir
if action == "parse":
documents = set()
- for inst in self.docrepos:
+ # assert self.docrepo_instances, "No docrepos are defined!"
+ for cls, inst in self.docrepo_instances.items():
for basefile in inst.store.list_basefiles_for("parse"):
if basefile not in documents:
documents.add(basefile)
yield basefile
else:
- for basefile in inst.store.list_basefiles_for(action):
+ for basefile in super(CompositeStore, self).list_basefiles_for(action):
yield basefile
@@ -54,58 +59,61 @@ def __init__(self, **kwargs):
self.store = self.documentstore_class(self.config.datadir + os.sep + self.alias,
downloaded_suffix=self.downloaded_suffix,
storage_policy=self.storage_policy,
- docrepos=self._instances)
+ docrepo_instances=self._instances)
def download(self):
for c in self.subrepos:
inst = self.get_instance(c, self.myoptions)
+ # make sure that our store has access to our now
+ # initialized subrepo objects
+ if c not in self.store.docrepo_instances:
+ self.store.docrepo_instances[c] = inst
inst.download()
# NOTE: this impl should NOT use the @managedparsing decorator
def parse(self, basefile):
- start = time()
- self.log.debug("%s: Starting", basefile)
- ret = False
- for c in self.subrepos:
- inst = self.get_instance(c, self.myoptions)
- try:
- # each parse method should be smart about whether to re-parse
- # or not (i.e. use the @managedparsing decorator)
- ret = inst.parse(basefile)
- except errors.ParseError: # or others
- ret = False
+ with util.logtime(self.log.info, "%(basefile)s OK (%(elapsed).3f sec)",
+ {'basefile': basefile}):
+ ret = False
+ for c in self.subrepos:
+ inst = self.get_instance(c, self.myoptions)
+ try:
+ # each parse method should be smart about whether to re-parse
+ # or not (i.e. use the @managedparsing decorator)
+ ret = inst.parse(basefile)
+ except errors.ParseError: # or others
+ ret = False
+ if ret:
+ break
if ret:
- break
- if ret:
- self.copy_parsed(basefile, inst)
+ self.copy_parsed(basefile, inst)
+ return ret
def copy_parsed(self, basefile, instance):
# If the distilled and parsed links are recent, assume that
# all external resources are OK as well
- if (util.outfile_is_newer([instance.distilled_path(basefile)],
- self.distilled_path(basefile)) and
- util.outfile_is_newer([instance.parsed_path(basefile)],
- self.parsed_path(basefile))):
- self.log.debug(
- "%s: External resources are (probably) up-to-date" % basefile)
+ if (util.outfile_is_newer([instance.store.distilled_path(basefile)],
+ self.store.distilled_path(basefile)) and
+ util.outfile_is_newer([instance.store.parsed_path(basefile)],
+ self.store.parsed_path(basefile))):
+ self.log.debug("%s: Attachments are (likely) up-to-date" % basefile)
return
+ util.link_or_copy(instance.store.distilled_path(basefile),
+ self.store.distilled_path(basefile))
+
+ util.link_or_copy(instance.store.parsed_path(basefile),
+ self.store.parsed_path(basefile))
+
cnt = 0
- for attachment in instance.store.list_attachments(doc.basefile, "parsed"):
+ for attachment in instance.store.list_attachments(basefile, "parsed"):
cnt += 1
- src = instance.store.parser_path(basename, attachment=attachment)
- target = self.store.parsed_path(basename, attachment=attachment)
+ src = instance.store.parsed_path(basefile, attachment=attachment)
+ target = self.store.parsed_path(basefile, attachment=attachment)
util.link_or_copy(src, target)
-
- util.link_or_copy(instance.distilled_path(basefile),
- self.distilled_path(basefile))
-
- util.link_or_copy(instance.parsed_path(basefile),
- self.parsed_path(basefile))
-
if cnt:
- self.log.debug("%s: Linked %s external resources from %s to %s" %
+ self.log.debug("%s: Linked %s attachments from %s to %s" %
(basefile,
cnt,
- os.path.dirname(instance.parsed_path(basefile)),
- os.path.dirname(self.parsed_path(basefile))))
+ os.path.dirname(instance.store.parsed_path(basefile)),
+ os.path.dirname(self.store.parsed_path(basefile))))
diff --git a/ferenda/decorators.py b/ferenda/decorators.py
index 69888c92..eb8c8cfe 100644
--- a/ferenda/decorators.py
+++ b/ferenda/decorators.py
@@ -76,8 +76,16 @@ def wrapper(self, doc):
def render(f):
"""Handles the serialization of the :py:class:`~ferenda.Document`
-object to XHTML+RDFa and RDF/XML files. Must be used in conjunction
-with :py:func:`~ferenda.decorators.makedocument`."""
+ object to XHTML+RDFa and RDF/XML files. Must be used in
+ conjunction with :py:func:`~ferenda.decorators.makedocument`.
+
+ """
+ # NOTE: The actual rendering is two lines of code. The bulk of
+ # this function validates that the XHTML+RDFa file that we end up
+ # with contains the exact same triples as is present in the doc
+ # object (including both the doc.meta Graph and any other Graph
+ # that might be present on any doc.body object)
+
def iterate_graphs(node):
res = []
if hasattr(node, 'meta') and node.meta is not None:
@@ -97,12 +105,15 @@ def wrapper(self, doc):
# css file + background images + png renderings of text
self.create_external_resources(doc)
- # Check to see that all metadata contained in doc.meta is
- # present in the serialized file.
+ # Validate that all triples specified in doc.meta and any
+ # .meta property on any body object is present in the
+ # XHTML+RDFa file.
distilled_graph = Graph()
- with codecs.open(self.store.parsed_path(doc.basefile), encoding="utf-8") as fp: # unicode
- distilled_graph.parse(data=fp.read(), format="rdfa", publicID=doc.uri)
+ with codecs.open(self.store.parsed_path(doc.basefile),
+ encoding="utf-8") as fp: # unicode
+ distilled_graph.parse(data=fp.read(), format="rdfa",
+ publicID=doc.uri)
# The act of parsing from RDFa binds a lot of namespaces
# in the graph in an unneccesary manner. Particularly it
# binds both 'dc' and 'dcterms' to
@@ -110,15 +121,18 @@ def wrapper(self, doc):
# less than predictable. Blow these prefixes away.
distilled_graph.bind("dc", URIRef("http://purl.org/dc/elements/1.1/"))
distilled_graph.bind(
- "dcterms", URIRef("http://example.org/this-prefix-should-not-be-used"))
+ "dcterms",
+ URIRef("http://example.org/this-prefix-should-not-be-used"))
util.ensure_dir(self.store.distilled_path(doc.basefile))
- with open(self.store.distilled_path(doc.basefile), "wb") as distilled_file:
+ with open(self.store.distilled_path(doc.basefile),
+ "wb") as distilled_file:
# print("============distilled===============")
# print(distilled_graph.serialize(format="turtle").decode('utf-8'))
distilled_graph.serialize(distilled_file, format="pretty-xml")
self.log.debug(
- '%s: %s triples extracted to %s', doc.basefile, len(distilled_graph), self.store.distilled_path(doc.basefile))
+ '%s: %s triples extracted to %s', doc.basefile,
+ len(distilled_graph), self.store.distilled_path(doc.basefile))
for g in iterate_graphs(doc.body):
doc.meta += g
diff --git a/ferenda/describer.py b/ferenda/describer.py
index 8eed9989..96b9301f 100644
--- a/ferenda/describer.py
+++ b/ferenda/describer.py
@@ -77,9 +77,9 @@ def getvalue(self, p):
"""
values = list(self.getvalues(p))
if len(values) == 0:
- raise KeyError("No objects for predicate %s" % p)
+ raise KeyError("No values for predicate %s" % p)
elif len(values) > 1:
- raise KeyError("More than one object for predicatee %s" % p)
+ raise KeyError("More than one value for predicate %s" % p)
return values[0]
def getrel(self, p):
@@ -94,7 +94,7 @@ def getrel(self, p):
"""
refs = list(self.getrels(p))
if len(refs) == 0:
- raise KeyError("No objects for predicate %s" + p)
+ raise KeyError("No objects for predicate %s" % p)
elif len(refs) > 1:
- raise KeyError("More than one object for predicatee %s" + p)
+ raise KeyError("More than one object for predicate %s" % p)
return refs[0]
diff --git a/ferenda/devel.py b/ferenda/devel.py
index 2c951263..b9bb449b 100644
--- a/ferenda/devel.py
+++ b/ferenda/devel.py
@@ -29,22 +29,6 @@ class Devel(object):
"""
alias = "devel"
- # FIXME: manager.py should not strictly require these to be present
-
- class DummyStore(object):
-
- def __init__(self, path, **kwargs):
- pass
-
- def list_basefiles_for(self, action, basedir=None):
- return []
- downloaded_suffix = ".html"
- storage_policy = "file"
- documentstore_class = DummyStore
-
- # Don't document this -- just needed for ferenda.manager compatibility
- def get_default_options(self):
- return {}
@decorators.action
def dumprdf(self, filename, format="turtle"):
@@ -309,34 +293,52 @@ def select(self, template, uri, format="json"):
p['triples'] = len(res)
print(res.serialize(format=format).decode('utf-8'))
+
+ # FIXME: These are dummy implementations of methods and class
+ # variables that manager.py expects all docrepos to have. We don't
+ # want to have coverage counting these as missing lines, hence the
+ # pragma: no cover comments.
+
+ class DummyStore(object):
+
+ def __init__(self, path, **kwargs):
+ pass # pragma: no cover
+
+ def list_basefiles_for(self, action, basedir=None):
+ return [] # pragma: no cover
+
+ documentstore_class = DummyStore
+ downloaded_suffix = ".html"
+ storage_policy = "file"
+
+ def get_default_options(self):
+ return {} # pragma: no cover
+
def download(self):
- pass
+ pass # pragma: no cover
def parse(self, basefile):
- pass
+ pass # pragma: no cover
def relate(self, basefile):
- pass
+ pass # pragma: no cover
def generate(self, basefile):
- pass
+ pass # pragma: no cover
def toc(self, otherrepos):
- pass
+ pass # pragma: no cover
def news(self, otherrepos):
- pass
+ pass # pragma: no cover
def status(self):
- pass
-
- def list_basefiles_for(self, command):
- return []
+ pass # pragma: no cover
@classmethod
def setup(cls, action, config):
- pass
+ pass # pragma: no cover
@classmethod
def teardown(cls, action, config):
- pass
+ pass # pragma: no cover
diff --git a/ferenda/documentstore.py b/ferenda/documentstore.py
index 3248292c..ecbd41ad 100644
--- a/ferenda/documentstore.py
+++ b/ferenda/documentstore.py
@@ -217,7 +217,10 @@ def list_basefiles_for(self, action, basedir=None):
suffix = ".rdf"
elif action == "generate":
directory = os.path.sep.join((basedir, "parsed"))
- suffix = ".xhtml"
+ if self.storage_policy == "dir":
+ suffix = "index.xhtml"
+ else:
+ suffix = ".xhtml"
elif action == "news":
directory = os.path.sep.join((basedir, "entries"))
suffix = ".json"
diff --git a/ferenda/sources/general/wiki.py b/ferenda/sources/general/wiki.py
index 93f5e615..a3607454 100644
--- a/ferenda/sources/general/wiki.py
+++ b/ferenda/sources/general/wiki.py
@@ -13,7 +13,7 @@
# mine
from ferenda import DocumentRepository
from ferenda import util
-from ferenda.legalref import LegalRef, Link
+# from ferenda.legalref import LegalRef, Link
# FIXME: Need to dynamically set this namespace (by inspecting the root?)
# as it varies with MW version
diff --git a/ferenda/sources/legal/eu/eurlexcaselaw.py b/ferenda/sources/legal/eu/eurlexcaselaw.py
index 223e4ef9..cb7f3cf8 100644
--- a/ferenda/sources/legal/eu/eurlexcaselaw.py
+++ b/ferenda/sources/legal/eu/eurlexcaselaw.py
@@ -7,7 +7,7 @@
from rdflib import Graph
from ferenda import DocumentRepository
-from ferenda.legalref import LegalRef
+from ferenda.sources.legal.se.legalref import LegalRef
from ferenda.elements import Paragraph
# FIXME: 2008.json, containing a handful of cases, some which should not be fetched, and one continuation link.
diff --git a/ferenda/sources/legal/se/dv.py b/ferenda/sources/legal/se/dv.py
index cf79f5f9..a6d22325 100755
--- a/ferenda/sources/legal/se/dv.py
+++ b/ferenda/sources/legal/se/dv.py
@@ -25,7 +25,7 @@
from ferenda import DocumentStore, Describer, WordReader
from ferenda.decorators import managedparsing
from ferenda import util
-from ferenda.legalref import LegalRef, Link
+from ferenda.sources.legal.se.legalref import LegalRef, Link
from ferenda.elements import Body, Paragraph
from . import SwedishLegalSource, RPUBL
diff --git a/ferenda/sources/legal/se/jk.py b/ferenda/sources/legal/se/jk.py
index 032695e9..10f5af8a 100644
--- a/ferenda/sources/legal/se/jk.py
+++ b/ferenda/sources/legal/se/jk.py
@@ -16,7 +16,7 @@
from .swedishlegalsource import Stycke, Sektion
from ferenda.decorators import downloadmax, recordlastdownload
from ferenda import util
-from ferenda.legalref import LegalRef, Link
+from ferenda.sources.legal.se.legalref import LegalRef, Link
class JK(SwedishLegalSource):
diff --git a/ferenda/legalref.py b/ferenda/sources/legal/se/legalref.py
similarity index 98%
rename from ferenda/legalref.py
rename to ferenda/sources/legal/se/legalref.py
index fb9ead53..6fc0bb26 100755
--- a/ferenda/legalref.py
+++ b/ferenda/sources/legal/se/legalref.py
@@ -12,7 +12,7 @@
# 3rdparty libs
# needed early
-from . import util
+from ferenda import util
external_simpleparse_state = None
try:
@@ -131,8 +131,8 @@ def tag(text, tagtable, sliceleft, sliceright):
# my own libraries
-from .elements import Link
-from .elements import LinkSubject
+from ferenda.elements import Link
+from ferenda.elements import LinkSubject
# The charset used for the bytestrings that is sent to/from
# simpleparse (which does not handle unicode)
@@ -243,26 +243,19 @@ def __init__(self, *args):
else:
scriptdir = os.path.dirname(__file__)
- #n3file = os.path.sep.join([scriptdir,"etc","sfs-extra.n3"])
- #n3url = "file://" + n3file.replace("\\","/")
-
- # print "scriptdir: %s" % scriptdir
- # print "n3file: %s" % n3file
- # print "n3url: %s" % n3url
-
self.graph = Graph()
- n3file = os.path.relpath(scriptdir + "/res/etc/sfs-extra.n3")
+ n3file = os.path.relpath(scriptdir + "/../../../res/etc/sfs-extra.n3")
# print "loading n3file %s" % n3file
self.graph.load(n3file, format="n3")
self.roots = []
self.uriformatter = {}
self.decl = "" # try to make it unicode clean all the way
self.namedlaws = {}
- self.load_ebnf(scriptdir + "/res/etc/base.ebnf")
+ self.load_ebnf(scriptdir + "/../../../res/etc/base.ebnf")
self.args = args
if self.LAGRUM in args:
- productions = self.load_ebnf(scriptdir + "/res/etc/lagrum.ebnf")
+ productions = self.load_ebnf(scriptdir + "/../../../res/etc/lagrum.ebnf")
for p in productions:
self.uriformatter[p] = self.sfs_format_uri
self.namedlaws.update(self.get_relations(RDFS.label))
@@ -274,10 +267,10 @@ def __init__(self, *args):
# nu, eftersom kortlagrum.ebnf beror på produktioner som
# definerats där
if not self.LAGRUM in args:
- self.load_ebnf(scriptdir + "/res/etc/lagrum.ebnf")
+ self.load_ebnf(scriptdir + "/../../../res/etc/lagrum.ebnf")
productions = self.load_ebnf(
- scriptdir + "/res/etc/kortlagrum.ebnf")
+ scriptdir + "/../../../res/etc/kortlagrum.ebnf")
for p in productions:
self.uriformatter[p] = self.sfs_format_uri
DCT = Namespace("http://purl.org/dc/terms/")
@@ -294,23 +287,23 @@ def __init__(self, *args):
self.roots.insert(0, "kortlagrumref")
if self.EGLAGSTIFTNING in args:
- productions = self.load_ebnf(scriptdir + "/res/etc/eglag.ebnf")
+ productions = self.load_ebnf(scriptdir + "/../../../res/etc/eglag.ebnf")
for p in productions:
self.uriformatter[p] = self.eglag_format_uri
self.roots.append("eglagref")
if self.FORARBETEN in args:
productions = self.load_ebnf(
- scriptdir + "/res/etc/forarbeten.ebnf")
+ scriptdir + "/../../../res/etc/forarbeten.ebnf")
for p in productions:
self.uriformatter[p] = self.forarbete_format_uri
self.roots.append("forarbeteref")
if self.RATTSFALL in args:
- productions = self.load_ebnf(scriptdir + "/res/etc/rattsfall.ebnf")
+ productions = self.load_ebnf(scriptdir + "/../../../res/etc/rattsfall.ebnf")
for p in productions:
self.uriformatter[p] = self.rattsfall_format_uri
self.roots.append("rattsfallref")
if self.EGRATTSFALL in args:
- productions = self.load_ebnf(scriptdir + "/res/etc/egratt.ebnf")
+ productions = self.load_ebnf(scriptdir + "/../../../res/etc/egratt.ebnf")
for p in productions:
self.uriformatter[p] = self.egrattsfall_format_uri
self.roots.append("ecjcaseref")
diff --git a/ferenda/legaluri.py b/ferenda/sources/legal/se/legaluri.py
similarity index 98%
rename from ferenda/legaluri.py
rename to ferenda/sources/legal/se/legaluri.py
index 336454f5..afde9b1a 100644
--- a/ferenda/legaluri.py
+++ b/ferenda/sources/legal/se/legaluri.py
@@ -18,8 +18,8 @@
# my own libraries
-from .legalref import LegalRef
-from . import util
+from ferenda.sources.legal.se.legalref import LegalRef
+from ferenda import util
RPUBL = Namespace('http://rinfo.lagrummet.se/ns/2008/11/rinfo/publ#')
RINFOEX = Namespace("http://lagen.nu/terms#")
diff --git a/ferenda/sources/legal/se/myndfskr.py b/ferenda/sources/legal/se/myndfskr.py
index af471cfc..c5e09cd1 100644
--- a/ferenda/sources/legal/se/myndfskr.py
+++ b/ferenda/sources/legal/se/myndfskr.py
@@ -14,7 +14,7 @@
import six
from ferenda import TextReader
-from ferenda.legalref import LegalRef
+from ferenda.sources.legal.se.legalref import LegalRef
from ferenda import util
from . import SwedishLegalSource
diff --git a/ferenda/sources/legal/se/sfs.py b/ferenda/sources/legal/se/sfs.py
index e8e1e1f5..c075222a 100755
--- a/ferenda/sources/legal/se/sfs.py
+++ b/ferenda/sources/legal/se/sfs.py
@@ -35,14 +35,14 @@
from ferenda import DocumentEntry, DocumentStore
from ferenda import TextReader, Describer
from ferenda import decorators
-from ferenda import legaluri
+from ferenda.sources.legal.se import legaluri
from ferenda import util, LayeredConfig
from ferenda.elements import CompoundElement
from ferenda.elements import OrdinalElement
from ferenda.elements import TemporalElement
from ferenda.elements import UnicodeElement
from ferenda.errors import DocumentRemovedError, ParseError
-from ferenda.legalref import LegalRef, LinkSubject
+from ferenda.sources.legal.se.legalref import LegalRef, LinkSubject
E = ElementMaker(namespace="http://www.w3.org/1999/xhtml")
# Objektmodellen för en författning är uppbyggd av massa byggstenar
diff --git a/ferenda/util.py b/ferenda/util.py
index cf51b24d..f8076e33 100755
--- a/ferenda/util.py
+++ b/ferenda/util.py
@@ -365,7 +365,7 @@ def link_or_copy(src, dst):
# The semantics of symlink are not identical to copy. The
# source must be relative to the dstination, not relative to
# cwd at creation time.
- relsrc = os.relpath(src, os.path.dirname(dst))
+ relsrc = os.path.relpath(src, os.path.dirname(dst))
os.symlink(relsrc, dst)
else:
copy_if_different(src, dst)
diff --git a/test/testCompositeRepo.py b/test/testCompositeRepo.py
index eea6905f..97e9bd64 100644
--- a/test/testCompositeRepo.py
+++ b/test/testCompositeRepo.py
@@ -4,25 +4,52 @@
import sys, os
if os.getcwd() not in sys.path: sys.path.insert(0,os.getcwd())
-from ferenda.testutil import RepoTester, DocumentRepository, util
-from ferenda.compat import unittest
+from ferenda import DocumentRepository, util, errors
+from ferenda.testutil import RepoTester
#SUT
from ferenda import CompositeRepository
class SubrepoA(DocumentRepository):
+ storage_policy = "dir"
alias= "a"
def download(self, basefile=None):
util.writefile(self.store.downloaded_path("1"), "basefile 1, repo a")
+ def parse(self, basefile):
+ if basefile == "1":
+ util.writefile(self.store.parsed_path("1"),
+ "basefile 1, parsed by a")
+ util.writefile(self.store.parsed_path("1", attachment="extra.txt"),
+ "attachment for basefile 1, parsed by a")
+ util.writefile(self.store.distilled_path("1"),
+ "basefile 1, metadata from a")
+ return True
+ else:
+ return False # we don't even have this basefile
+
class SubrepoB(DocumentRepository):
+ storage_policy = "dir"
alias= "b"
def download(self, basefile=None):
util.writefile(self.store.downloaded_path("1"), "basefile 1, repo b")
util.writefile(self.store.downloaded_path("2"), "basefile 2, repo b")
+ def parse(self, basefile):
+ if basefile == "1":
+ util.writefile(self.store.parsed_path("1"),
+ "basefile 1, parsed by b")
+ util.writefile(self.store.parsed_path("1", attachment="attach.txt"),
+ "attachment for basefile 1, parsed by b")
+ util.writefile(self.store.distilled_path("1"),
+ "basefile 1, metadata from b")
+ return True
+ else:
+ raise errors.ParseError("No can do!")
+
class CompositeExample(CompositeRepository):
subrepos = SubrepoB, SubrepoA
+ storage_policy = "dir"
class TestComposite(RepoTester):
repoclass = CompositeExample
@@ -30,18 +57,40 @@ class TestComposite(RepoTester):
def test_download(self):
self.repo.download()
self.assertEqual("basefile 1, repo a",
- util.readfile(self.datadir+"/a/downloaded/1.html"))
+ util.readfile(self.datadir+"/a/downloaded/1/index.html"))
self.assertEqual("basefile 1, repo b",
- util.readfile(self.datadir+"/b/downloaded/1.html"))
+ util.readfile(self.datadir+"/b/downloaded/1/index.html"))
self.assertEqual("basefile 2, repo b",
- util.readfile(self.datadir+"/b/downloaded/2.html"))
+ util.readfile(self.datadir+"/b/downloaded/2/index.html"))
- @unittest.expectedFailure
def test_list_basefiles_for(self):
self.repo.download()
# This doesn't work since self.repo.store.docrepos has
# uninitialized classes, not objects
- self.assertEqual(["1", "2"],
- list(self.repo.store.list_basefiles_for("parse")))
-
+ self.assertEqual(set(["2", "1"]),
+ set(self.repo.store.list_basefiles_for("parse")))
+
+ def test_parse(self):
+ # we already know list_basefiles_for("parse") will return ["2", "1"]
+ self.assertTrue(self.repo.parse("1")) # both A and B can handle this
+ # but B should win
+ self.assertEqual("basefile 1, parsed by b",
+ util.readfile(self.repo.store.parsed_path("1")))
+ self.assertEqual("basefile 1, metadata from b",
+ util.readfile(self.repo.store.distilled_path("1")))
+ self.assertTrue(["attach.txt"],
+ self.repo.store.list_attachments("1", "parsed"))
+ self.assertFalse(self.repo.parse("2")) # none can handle this
+
+ # in this case, all files should be up-to-date, so no copying
+ # should occur (triggering the "Attachments are (likely)
+ # up-to-date branch")
+ self.assertTrue(self.repo.parse("1"))
+
+ # and finally, list_basefiles_for("generate") should delegate
+ # to DocumentStore.list_basefiles_for
+ self.assertEqual(set(["1"]),
+ set(self.repo.store.list_basefiles_for("generate")))
+
+
diff --git a/test/testDecorators.py b/test/testDecorators.py
index 11c71b8f..392a39d8 100644
--- a/test/testDecorators.py
+++ b/test/testDecorators.py
@@ -1,20 +1,14 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
-import sys, os
-from ferenda.compat import unittest
+import sys, os, datetime
+from ferenda.compat import unittest, Mock, MagicMock, patch
if os.getcwd() not in sys.path: sys.path.insert(0,os.getcwd())
-try:
- # assume we're on py3.3 and fall back if not
- from unittest.mock import Mock, MagicMock, patch
-except ImportError:
- from mock import Mock, MagicMock, patch
-
from ferenda import DocumentRepository, Document
from ferenda.errors import DocumentRemovedError, ParseError
# SUT
-from ferenda.decorators import timed, parseifneeded, render, handleerror, makedocument
+from ferenda.decorators import timed, parseifneeded, render, handleerror, makedocument, recordlastdownload, downloadmax
class Decorators(unittest.TestCase):
@@ -99,8 +93,10 @@ def testfunc(repo,doc):
mockrepo.store.distilled_path.return_value = "distilled_path.xhtml"
mockrepo.get_globals.return_value = {'symbol table':'fake'}
- mockdoc.meta = MagicMock()
- mockdoc.body = []
+ mockdoc.meta = MagicMock() # need Magicmock which supports magic funcs like __iter__
+ bodypart = MagicMock()
+ bodypart.meta = MagicMock()
+ mockdoc.body = [bodypart]
mockdoc.meta.__iter__.return_value = []
mockdoc.uri = "http://example.org/doc"
with patch('ferenda.util.ensure_dir', return_value=True):
@@ -192,3 +188,36 @@ def testfunc(repo,doc):
doc = testfunc(DocumentRepository(),"base/file")
self.assertIsInstance(doc,Document)
self.assertEqual(doc.basefile, "base/file")
+
+ def test_recordlastdownload(self):
+ @recordlastdownload
+ def testfunc(repo):
+ pass
+ mockrepo = Mock()
+ with patch('ferenda.decorators.LayeredConfig.write') as mockconf:
+ testfunc(mockrepo)
+ # check that config.lastdownload has been set to a datetime
+ self.assertIsInstance(mockrepo.config.lastdownload,
+ datetime.datetime)
+ # and that LayeredConfig.write has been called
+ self.assertTrue(mockconf.called)
+
+ def test_downloadmax(self):
+ @downloadmax
+ def testfunc(repo, source):
+ for x in range(100):
+ yield x
+ mockrepo = Mock()
+ mockrepo.config.downloadmax = None
+ self.assertEqual(100, len(list(testfunc(mockrepo, None))))
+
+ os.environ["FERENDA_DOWNLOADMAX"] = "10"
+ self.assertEqual(10, len(list(testfunc(mockrepo, None))))
+
+ del os.environ["FERENDA_DOWNLOADMAX"]
+ mockrepo.config.downloadmax = 20
+ self.assertEqual(20, len(list(testfunc(mockrepo, None))))
+
+
+
+
diff --git a/test/testDescriber.py b/test/testDescriber.py
new file mode 100644
index 00000000..38b3940c
--- /dev/null
+++ b/test/testDescriber.py
@@ -0,0 +1,71 @@
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+
+import datetime
+
+from ferenda.compat import unittest
+
+from rdflib import Graph, Namespace
+
+# SUT
+from ferenda import Describer
+DCT = Namespace("http://purl.org/dc/terms/")
+FOAF = Namespace("http://xmlns.com/foaf/0.1/")
+
+class TestDescriber(unittest.TestCase):
+ def setUp(self):
+ self.graph = Graph()
+ self.graph.parse(data="""
+@prefix dct: .
+@prefix foaf: .
+@prefix xsd: .
+
+ a foaf:Document;
+ dct:title "Hello world"@en ;
+ dct:identifier "ID1",
+ "ID2";
+ dct:issued "2013-10-11"^^xsd:date;
+ dct:references ;
+ dct:subject ,
+ .
+ """, format="turtle")
+ self.desc = Describer(self.graph, "http://example.org/doc")
+
+ def test_getvalues(self):
+ self.assertEqual(self.desc.getvalues(DCT.alternate),
+ [])
+ self.assertEqual(self.desc.getvalues(DCT.title),
+ ["Hello world"])
+ self.assertEqual(set(self.desc.getvalues(DCT.identifier)),
+ set(["ID1", "ID2"]))
+
+ def test_getvalue(self):
+ self.assertEqual(self.desc.getvalue(DCT.title),
+ "Hello world")
+ self.assertEqual(self.desc.getvalue(DCT.issued),
+ datetime.date(2013,10,11))
+ with self.assertRaises(KeyError):
+ self.desc.getvalue(DCT.alternate)
+ with self.assertRaises(KeyError):
+ self.desc.getvalue(DCT.identifier)
+
+ def test_getrels(self):
+ self.assertEqual(self.desc.getrels(DCT.replaces),
+ [])
+ self.assertEqual(self.desc.getrels(DCT.references),
+ ["http://example.org/doc2"])
+ self.assertEqual(set(self.desc.getrels(DCT.subject)),
+ set(["http://example.org/concept1",
+ "http://example.org/concept2"]))
+
+ def test_getrel(self):
+ self.assertEqual(self.desc.getrel(DCT.references),
+ "http://example.org/doc2")
+ with self.assertRaises(KeyError):
+ self.desc.getrel(DCT.replaces)
+ with self.assertRaises(KeyError):
+ self.desc.getrel(DCT.subject)
+
+ def test_getrdftype(self):
+ self.assertEqual(self.desc.getrdftype(),
+ "http://xmlns.com/foaf/0.1/Document")
diff --git a/test/testDevel.py b/test/testDevel.py
index 4d6fadc1..e79a7313 100644
--- a/test/testDevel.py
+++ b/test/testDevel.py
@@ -2,12 +2,36 @@
from __future__ import unicode_literals
import sys, os
-from ferenda.compat import unittest
+
+import six
+from ferenda.compat import unittest, patch, call, MagicMock
if os.getcwd() not in sys.path: sys.path.insert(0,os.getcwd())
+from tempfile import mkstemp
+
from ferenda import Devel
class Main(unittest.TestCase):
+ def test_dumprdf(self):
+ fileno, tmpfile = mkstemp()
+ fp = os.fdopen(fileno, "w")
+ fp.write("""
+
+ Doc title
+
+ ...
+ """)
+ fp.close()
+ d = Devel()
+ mock = MagicMock()
+ builtins = "__builtin__" if six.PY2 else "builtins"
+ with patch(builtins+'.print', mock):
+ d.dumprdf(tmpfile, format="nt")
+ self.assertTrue(mock.called)
+ want = ' "Doc title" .\n\n'
+ mock.assert_has_calls([call(want)])
+
+
def test_parsestring(self):
d = Devel()
with self.assertRaises(NotImplementedError):
diff --git a/test/testDocStore.py b/test/testDocStore.py
index dd28402c..92f48501 100644
--- a/test/testDocStore.py
+++ b/test/testDocStore.py
@@ -157,7 +157,7 @@ def test_list_basefiles_file(self):
self.assertEqual(list(self.store.list_basefiles_for("parse")),
basefiles)
- def test_list_basefiles_dir(self):
+ def test_list_basefiles_parse_dir(self):
files = ["downloaded/123/a/index.html",
"downloaded/123/b/index.html",
"downloaded/124/a/index.html",
@@ -170,6 +170,19 @@ def test_list_basefiles_dir(self):
self.assertEqual(list(self.store.list_basefiles_for("parse")),
basefiles)
+ def test_list_basefiles_generate_dir(self):
+ files = ["parsed/123/a/index.xhtml",
+ "parsed/123/b/index.xhtml",
+ "parsed/124/a/index.xhtml",
+ "parsed/124/b/index.xhtml"]
+ basefiles = ["124/b", "124/a", "123/b", "123/a"]
+
+ self.store.storage_policy = "dir"
+ for f in files:
+ util.writefile(self.p(f),"nonempty")
+ self.assertEqual(list(self.store.list_basefiles_for("generate")),
+ basefiles)
+
def test_list_versions_file(self):
files = ["archive/downloaded/123/a/1.html",
"archive/downloaded/123/a/2.html",
diff --git a/test/testManager.py b/test/testManager.py
index a7340a2f..0d205afd 100644
--- a/test/testManager.py
+++ b/test/testManager.py
@@ -92,6 +92,15 @@ def mymethod(self, arg):
"""Frobnicate the bizbaz (alternate implementation)"""
if arg == "myarg":
return "yeah!"
+
+class staticmockclass3(staticmockclass):
+ """Yet another (overrides footer())"""
+ alias="staticmock3"
+ def footer(self):
+ return (("About", "http://example.org/about"),
+ ("Legal", "http://example.org/legal"),
+ ("Contact", "http://example.org/contact")
+ )
class API(unittest.TestCase):
"""Test cases for API level methods of the manager modules (functions
@@ -267,11 +276,8 @@ def test_makeresources(self):
got = manager.makeresources([test],self.tempdir+os.sep+'rsrc', combine=True)
# test7: test the footer() functionality
- from ferenda.sources.general import Static
- static = Static()
- for b in static.store.list_basefiles_for("parse"):
- static.parse(b)
- got = manager.makeresources([Static()], self.tempdir+os.sep+'rsrc')
+ test = staticmockclass3()
+ got = manager.makeresources([test], self.tempdir+os.sep+'rsrc')
tree = ET.parse(self.tempdir+os.sep+got['xml'][0])
footerlinks=tree.findall("footerlinks/nav/ul/li")
self.assertTrue(footerlinks)