diff --git a/ferenda/compat.py b/ferenda/compat.py index e6207b86..a29e6073 100644 --- a/ferenda/compat.py +++ b/ferenda/compat.py @@ -21,6 +21,6 @@ import unittest try: - from unittest.mock import Mock, patch, call + from unittest.mock import Mock, MagicMock, patch, call except ImportError: # pragma: no cover - from mock import Mock, patch, call + from mock import Mock, MagicMock, patch, call diff --git a/ferenda/compositerepository.py b/ferenda/compositerepository.py index 57c56236..98573dac 100644 --- a/ferenda/compositerepository.py +++ b/ferenda/compositerepository.py @@ -3,29 +3,34 @@ import os -from . import DocumentRepository, DocumentStore - +from ferenda import DocumentRepository, DocumentStore +from ferenda import util, errors class CompositeStore(DocumentStore): - def __init__(self, datadir, downloaded_suffix=".html", storage_policy="file", docrepos=[]): + def __init__(self, datadir, downloaded_suffix=".html", + storage_policy="file", + docrepo_instances=None): self.datadir = datadir # docrepo.datadir + docrepo.alias self.downloaded_suffix = downloaded_suffix self.storage_policy = storage_policy - self.docrepos = docrepos + if not docrepo_instances: + docrepo_instances = {} + self.docrepo_instances = docrepo_instances def list_basefiles_for(self, action, basedir=None): if not basedir: basedir = self.datadir if action == "parse": documents = set() - for inst in self.docrepos: + # assert self.docrepo_instances, "No docrepos are defined!" + for cls, inst in self.docrepo_instances.items(): for basefile in inst.store.list_basefiles_for("parse"): if basefile not in documents: documents.add(basefile) yield basefile else: - for basefile in inst.store.list_basefiles_for(action): + for basefile in super(CompositeStore, self).list_basefiles_for(action): yield basefile @@ -54,58 +59,61 @@ def __init__(self, **kwargs): self.store = self.documentstore_class(self.config.datadir + os.sep + self.alias, downloaded_suffix=self.downloaded_suffix, storage_policy=self.storage_policy, - docrepos=self._instances) + docrepo_instances=self._instances) def download(self): for c in self.subrepos: inst = self.get_instance(c, self.myoptions) + # make sure that our store has access to our now + # initialized subrepo objects + if c not in self.store.docrepo_instances: + self.store.docrepo_instances[c] = inst inst.download() # NOTE: this impl should NOT use the @managedparsing decorator def parse(self, basefile): - start = time() - self.log.debug("%s: Starting", basefile) - ret = False - for c in self.subrepos: - inst = self.get_instance(c, self.myoptions) - try: - # each parse method should be smart about whether to re-parse - # or not (i.e. use the @managedparsing decorator) - ret = inst.parse(basefile) - except errors.ParseError: # or others - ret = False + with util.logtime(self.log.info, "%(basefile)s OK (%(elapsed).3f sec)", + {'basefile': basefile}): + ret = False + for c in self.subrepos: + inst = self.get_instance(c, self.myoptions) + try: + # each parse method should be smart about whether to re-parse + # or not (i.e. use the @managedparsing decorator) + ret = inst.parse(basefile) + except errors.ParseError: # or others + ret = False + if ret: + break if ret: - break - if ret: - self.copy_parsed(basefile, inst) + self.copy_parsed(basefile, inst) + return ret def copy_parsed(self, basefile, instance): # If the distilled and parsed links are recent, assume that # all external resources are OK as well - if (util.outfile_is_newer([instance.distilled_path(basefile)], - self.distilled_path(basefile)) and - util.outfile_is_newer([instance.parsed_path(basefile)], - self.parsed_path(basefile))): - self.log.debug( - "%s: External resources are (probably) up-to-date" % basefile) + if (util.outfile_is_newer([instance.store.distilled_path(basefile)], + self.store.distilled_path(basefile)) and + util.outfile_is_newer([instance.store.parsed_path(basefile)], + self.store.parsed_path(basefile))): + self.log.debug("%s: Attachments are (likely) up-to-date" % basefile) return + util.link_or_copy(instance.store.distilled_path(basefile), + self.store.distilled_path(basefile)) + + util.link_or_copy(instance.store.parsed_path(basefile), + self.store.parsed_path(basefile)) + cnt = 0 - for attachment in instance.store.list_attachments(doc.basefile, "parsed"): + for attachment in instance.store.list_attachments(basefile, "parsed"): cnt += 1 - src = instance.store.parser_path(basename, attachment=attachment) - target = self.store.parsed_path(basename, attachment=attachment) + src = instance.store.parsed_path(basefile, attachment=attachment) + target = self.store.parsed_path(basefile, attachment=attachment) util.link_or_copy(src, target) - - util.link_or_copy(instance.distilled_path(basefile), - self.distilled_path(basefile)) - - util.link_or_copy(instance.parsed_path(basefile), - self.parsed_path(basefile)) - if cnt: - self.log.debug("%s: Linked %s external resources from %s to %s" % + self.log.debug("%s: Linked %s attachments from %s to %s" % (basefile, cnt, - os.path.dirname(instance.parsed_path(basefile)), - os.path.dirname(self.parsed_path(basefile)))) + os.path.dirname(instance.store.parsed_path(basefile)), + os.path.dirname(self.store.parsed_path(basefile)))) diff --git a/ferenda/decorators.py b/ferenda/decorators.py index 69888c92..eb8c8cfe 100644 --- a/ferenda/decorators.py +++ b/ferenda/decorators.py @@ -76,8 +76,16 @@ def wrapper(self, doc): def render(f): """Handles the serialization of the :py:class:`~ferenda.Document` -object to XHTML+RDFa and RDF/XML files. Must be used in conjunction -with :py:func:`~ferenda.decorators.makedocument`.""" + object to XHTML+RDFa and RDF/XML files. Must be used in + conjunction with :py:func:`~ferenda.decorators.makedocument`. + + """ + # NOTE: The actual rendering is two lines of code. The bulk of + # this function validates that the XHTML+RDFa file that we end up + # with contains the exact same triples as is present in the doc + # object (including both the doc.meta Graph and any other Graph + # that might be present on any doc.body object) + def iterate_graphs(node): res = [] if hasattr(node, 'meta') and node.meta is not None: @@ -97,12 +105,15 @@ def wrapper(self, doc): # css file + background images + png renderings of text self.create_external_resources(doc) - # Check to see that all metadata contained in doc.meta is - # present in the serialized file. + # Validate that all triples specified in doc.meta and any + # .meta property on any body object is present in the + # XHTML+RDFa file. distilled_graph = Graph() - with codecs.open(self.store.parsed_path(doc.basefile), encoding="utf-8") as fp: # unicode - distilled_graph.parse(data=fp.read(), format="rdfa", publicID=doc.uri) + with codecs.open(self.store.parsed_path(doc.basefile), + encoding="utf-8") as fp: # unicode + distilled_graph.parse(data=fp.read(), format="rdfa", + publicID=doc.uri) # The act of parsing from RDFa binds a lot of namespaces # in the graph in an unneccesary manner. Particularly it # binds both 'dc' and 'dcterms' to @@ -110,15 +121,18 @@ def wrapper(self, doc): # less than predictable. Blow these prefixes away. distilled_graph.bind("dc", URIRef("http://purl.org/dc/elements/1.1/")) distilled_graph.bind( - "dcterms", URIRef("http://example.org/this-prefix-should-not-be-used")) + "dcterms", + URIRef("http://example.org/this-prefix-should-not-be-used")) util.ensure_dir(self.store.distilled_path(doc.basefile)) - with open(self.store.distilled_path(doc.basefile), "wb") as distilled_file: + with open(self.store.distilled_path(doc.basefile), + "wb") as distilled_file: # print("============distilled===============") # print(distilled_graph.serialize(format="turtle").decode('utf-8')) distilled_graph.serialize(distilled_file, format="pretty-xml") self.log.debug( - '%s: %s triples extracted to %s', doc.basefile, len(distilled_graph), self.store.distilled_path(doc.basefile)) + '%s: %s triples extracted to %s', doc.basefile, + len(distilled_graph), self.store.distilled_path(doc.basefile)) for g in iterate_graphs(doc.body): doc.meta += g diff --git a/ferenda/describer.py b/ferenda/describer.py index 8eed9989..96b9301f 100644 --- a/ferenda/describer.py +++ b/ferenda/describer.py @@ -77,9 +77,9 @@ def getvalue(self, p): """ values = list(self.getvalues(p)) if len(values) == 0: - raise KeyError("No objects for predicate %s" % p) + raise KeyError("No values for predicate %s" % p) elif len(values) > 1: - raise KeyError("More than one object for predicatee %s" % p) + raise KeyError("More than one value for predicate %s" % p) return values[0] def getrel(self, p): @@ -94,7 +94,7 @@ def getrel(self, p): """ refs = list(self.getrels(p)) if len(refs) == 0: - raise KeyError("No objects for predicate %s" + p) + raise KeyError("No objects for predicate %s" % p) elif len(refs) > 1: - raise KeyError("More than one object for predicatee %s" + p) + raise KeyError("More than one object for predicate %s" % p) return refs[0] diff --git a/ferenda/devel.py b/ferenda/devel.py index 2c951263..b9bb449b 100644 --- a/ferenda/devel.py +++ b/ferenda/devel.py @@ -29,22 +29,6 @@ class Devel(object): """ alias = "devel" - # FIXME: manager.py should not strictly require these to be present - - class DummyStore(object): - - def __init__(self, path, **kwargs): - pass - - def list_basefiles_for(self, action, basedir=None): - return [] - downloaded_suffix = ".html" - storage_policy = "file" - documentstore_class = DummyStore - - # Don't document this -- just needed for ferenda.manager compatibility - def get_default_options(self): - return {} @decorators.action def dumprdf(self, filename, format="turtle"): @@ -309,34 +293,52 @@ def select(self, template, uri, format="json"): p['triples'] = len(res) print(res.serialize(format=format).decode('utf-8')) + + # FIXME: These are dummy implementations of methods and class + # variables that manager.py expects all docrepos to have. We don't + # want to have coverage counting these as missing lines, hence the + # pragma: no cover comments. + + class DummyStore(object): + + def __init__(self, path, **kwargs): + pass # pragma: no cover + + def list_basefiles_for(self, action, basedir=None): + return [] # pragma: no cover + + documentstore_class = DummyStore + downloaded_suffix = ".html" + storage_policy = "file" + + def get_default_options(self): + return {} # pragma: no cover + def download(self): - pass + pass # pragma: no cover def parse(self, basefile): - pass + pass # pragma: no cover def relate(self, basefile): - pass + pass # pragma: no cover def generate(self, basefile): - pass + pass # pragma: no cover def toc(self, otherrepos): - pass + pass # pragma: no cover def news(self, otherrepos): - pass + pass # pragma: no cover def status(self): - pass - - def list_basefiles_for(self, command): - return [] + pass # pragma: no cover @classmethod def setup(cls, action, config): - pass + pass # pragma: no cover @classmethod def teardown(cls, action, config): - pass + pass # pragma: no cover diff --git a/ferenda/documentstore.py b/ferenda/documentstore.py index 3248292c..ecbd41ad 100644 --- a/ferenda/documentstore.py +++ b/ferenda/documentstore.py @@ -217,7 +217,10 @@ def list_basefiles_for(self, action, basedir=None): suffix = ".rdf" elif action == "generate": directory = os.path.sep.join((basedir, "parsed")) - suffix = ".xhtml" + if self.storage_policy == "dir": + suffix = "index.xhtml" + else: + suffix = ".xhtml" elif action == "news": directory = os.path.sep.join((basedir, "entries")) suffix = ".json" diff --git a/ferenda/sources/general/wiki.py b/ferenda/sources/general/wiki.py index 93f5e615..a3607454 100644 --- a/ferenda/sources/general/wiki.py +++ b/ferenda/sources/general/wiki.py @@ -13,7 +13,7 @@ # mine from ferenda import DocumentRepository from ferenda import util -from ferenda.legalref import LegalRef, Link +# from ferenda.legalref import LegalRef, Link # FIXME: Need to dynamically set this namespace (by inspecting the root?) # as it varies with MW version diff --git a/ferenda/sources/legal/eu/eurlexcaselaw.py b/ferenda/sources/legal/eu/eurlexcaselaw.py index 223e4ef9..cb7f3cf8 100644 --- a/ferenda/sources/legal/eu/eurlexcaselaw.py +++ b/ferenda/sources/legal/eu/eurlexcaselaw.py @@ -7,7 +7,7 @@ from rdflib import Graph from ferenda import DocumentRepository -from ferenda.legalref import LegalRef +from ferenda.sources.legal.se.legalref import LegalRef from ferenda.elements import Paragraph # FIXME: 2008.json, containing a handful of cases, some which should not be fetched, and one continuation link. diff --git a/ferenda/sources/legal/se/dv.py b/ferenda/sources/legal/se/dv.py index cf79f5f9..a6d22325 100755 --- a/ferenda/sources/legal/se/dv.py +++ b/ferenda/sources/legal/se/dv.py @@ -25,7 +25,7 @@ from ferenda import DocumentStore, Describer, WordReader from ferenda.decorators import managedparsing from ferenda import util -from ferenda.legalref import LegalRef, Link +from ferenda.sources.legal.se.legalref import LegalRef, Link from ferenda.elements import Body, Paragraph from . import SwedishLegalSource, RPUBL diff --git a/ferenda/sources/legal/se/jk.py b/ferenda/sources/legal/se/jk.py index 032695e9..10f5af8a 100644 --- a/ferenda/sources/legal/se/jk.py +++ b/ferenda/sources/legal/se/jk.py @@ -16,7 +16,7 @@ from .swedishlegalsource import Stycke, Sektion from ferenda.decorators import downloadmax, recordlastdownload from ferenda import util -from ferenda.legalref import LegalRef, Link +from ferenda.sources.legal.se.legalref import LegalRef, Link class JK(SwedishLegalSource): diff --git a/ferenda/legalref.py b/ferenda/sources/legal/se/legalref.py similarity index 98% rename from ferenda/legalref.py rename to ferenda/sources/legal/se/legalref.py index fb9ead53..6fc0bb26 100755 --- a/ferenda/legalref.py +++ b/ferenda/sources/legal/se/legalref.py @@ -12,7 +12,7 @@ # 3rdparty libs # needed early -from . import util +from ferenda import util external_simpleparse_state = None try: @@ -131,8 +131,8 @@ def tag(text, tagtable, sliceleft, sliceright): # my own libraries -from .elements import Link -from .elements import LinkSubject +from ferenda.elements import Link +from ferenda.elements import LinkSubject # The charset used for the bytestrings that is sent to/from # simpleparse (which does not handle unicode) @@ -243,26 +243,19 @@ def __init__(self, *args): else: scriptdir = os.path.dirname(__file__) - #n3file = os.path.sep.join([scriptdir,"etc","sfs-extra.n3"]) - #n3url = "file://" + n3file.replace("\\","/") - - # print "scriptdir: %s" % scriptdir - # print "n3file: %s" % n3file - # print "n3url: %s" % n3url - self.graph = Graph() - n3file = os.path.relpath(scriptdir + "/res/etc/sfs-extra.n3") + n3file = os.path.relpath(scriptdir + "/../../../res/etc/sfs-extra.n3") # print "loading n3file %s" % n3file self.graph.load(n3file, format="n3") self.roots = [] self.uriformatter = {} self.decl = "" # try to make it unicode clean all the way self.namedlaws = {} - self.load_ebnf(scriptdir + "/res/etc/base.ebnf") + self.load_ebnf(scriptdir + "/../../../res/etc/base.ebnf") self.args = args if self.LAGRUM in args: - productions = self.load_ebnf(scriptdir + "/res/etc/lagrum.ebnf") + productions = self.load_ebnf(scriptdir + "/../../../res/etc/lagrum.ebnf") for p in productions: self.uriformatter[p] = self.sfs_format_uri self.namedlaws.update(self.get_relations(RDFS.label)) @@ -274,10 +267,10 @@ def __init__(self, *args): # nu, eftersom kortlagrum.ebnf beror på produktioner som # definerats där if not self.LAGRUM in args: - self.load_ebnf(scriptdir + "/res/etc/lagrum.ebnf") + self.load_ebnf(scriptdir + "/../../../res/etc/lagrum.ebnf") productions = self.load_ebnf( - scriptdir + "/res/etc/kortlagrum.ebnf") + scriptdir + "/../../../res/etc/kortlagrum.ebnf") for p in productions: self.uriformatter[p] = self.sfs_format_uri DCT = Namespace("http://purl.org/dc/terms/") @@ -294,23 +287,23 @@ def __init__(self, *args): self.roots.insert(0, "kortlagrumref") if self.EGLAGSTIFTNING in args: - productions = self.load_ebnf(scriptdir + "/res/etc/eglag.ebnf") + productions = self.load_ebnf(scriptdir + "/../../../res/etc/eglag.ebnf") for p in productions: self.uriformatter[p] = self.eglag_format_uri self.roots.append("eglagref") if self.FORARBETEN in args: productions = self.load_ebnf( - scriptdir + "/res/etc/forarbeten.ebnf") + scriptdir + "/../../../res/etc/forarbeten.ebnf") for p in productions: self.uriformatter[p] = self.forarbete_format_uri self.roots.append("forarbeteref") if self.RATTSFALL in args: - productions = self.load_ebnf(scriptdir + "/res/etc/rattsfall.ebnf") + productions = self.load_ebnf(scriptdir + "/../../../res/etc/rattsfall.ebnf") for p in productions: self.uriformatter[p] = self.rattsfall_format_uri self.roots.append("rattsfallref") if self.EGRATTSFALL in args: - productions = self.load_ebnf(scriptdir + "/res/etc/egratt.ebnf") + productions = self.load_ebnf(scriptdir + "/../../../res/etc/egratt.ebnf") for p in productions: self.uriformatter[p] = self.egrattsfall_format_uri self.roots.append("ecjcaseref") diff --git a/ferenda/legaluri.py b/ferenda/sources/legal/se/legaluri.py similarity index 98% rename from ferenda/legaluri.py rename to ferenda/sources/legal/se/legaluri.py index 336454f5..afde9b1a 100644 --- a/ferenda/legaluri.py +++ b/ferenda/sources/legal/se/legaluri.py @@ -18,8 +18,8 @@ # my own libraries -from .legalref import LegalRef -from . import util +from ferenda.sources.legal.se.legalref import LegalRef +from ferenda import util RPUBL = Namespace('http://rinfo.lagrummet.se/ns/2008/11/rinfo/publ#') RINFOEX = Namespace("http://lagen.nu/terms#") diff --git a/ferenda/sources/legal/se/myndfskr.py b/ferenda/sources/legal/se/myndfskr.py index af471cfc..c5e09cd1 100644 --- a/ferenda/sources/legal/se/myndfskr.py +++ b/ferenda/sources/legal/se/myndfskr.py @@ -14,7 +14,7 @@ import six from ferenda import TextReader -from ferenda.legalref import LegalRef +from ferenda.sources.legal.se.legalref import LegalRef from ferenda import util from . import SwedishLegalSource diff --git a/ferenda/sources/legal/se/sfs.py b/ferenda/sources/legal/se/sfs.py index e8e1e1f5..c075222a 100755 --- a/ferenda/sources/legal/se/sfs.py +++ b/ferenda/sources/legal/se/sfs.py @@ -35,14 +35,14 @@ from ferenda import DocumentEntry, DocumentStore from ferenda import TextReader, Describer from ferenda import decorators -from ferenda import legaluri +from ferenda.sources.legal.se import legaluri from ferenda import util, LayeredConfig from ferenda.elements import CompoundElement from ferenda.elements import OrdinalElement from ferenda.elements import TemporalElement from ferenda.elements import UnicodeElement from ferenda.errors import DocumentRemovedError, ParseError -from ferenda.legalref import LegalRef, LinkSubject +from ferenda.sources.legal.se.legalref import LegalRef, LinkSubject E = ElementMaker(namespace="http://www.w3.org/1999/xhtml") # Objektmodellen för en författning är uppbyggd av massa byggstenar diff --git a/ferenda/util.py b/ferenda/util.py index cf51b24d..f8076e33 100755 --- a/ferenda/util.py +++ b/ferenda/util.py @@ -365,7 +365,7 @@ def link_or_copy(src, dst): # The semantics of symlink are not identical to copy. The # source must be relative to the dstination, not relative to # cwd at creation time. - relsrc = os.relpath(src, os.path.dirname(dst)) + relsrc = os.path.relpath(src, os.path.dirname(dst)) os.symlink(relsrc, dst) else: copy_if_different(src, dst) diff --git a/test/testCompositeRepo.py b/test/testCompositeRepo.py index eea6905f..97e9bd64 100644 --- a/test/testCompositeRepo.py +++ b/test/testCompositeRepo.py @@ -4,25 +4,52 @@ import sys, os if os.getcwd() not in sys.path: sys.path.insert(0,os.getcwd()) -from ferenda.testutil import RepoTester, DocumentRepository, util -from ferenda.compat import unittest +from ferenda import DocumentRepository, util, errors +from ferenda.testutil import RepoTester #SUT from ferenda import CompositeRepository class SubrepoA(DocumentRepository): + storage_policy = "dir" alias= "a" def download(self, basefile=None): util.writefile(self.store.downloaded_path("1"), "basefile 1, repo a") + def parse(self, basefile): + if basefile == "1": + util.writefile(self.store.parsed_path("1"), + "basefile 1, parsed by a") + util.writefile(self.store.parsed_path("1", attachment="extra.txt"), + "attachment for basefile 1, parsed by a") + util.writefile(self.store.distilled_path("1"), + "basefile 1, metadata from a") + return True + else: + return False # we don't even have this basefile + class SubrepoB(DocumentRepository): + storage_policy = "dir" alias= "b" def download(self, basefile=None): util.writefile(self.store.downloaded_path("1"), "basefile 1, repo b") util.writefile(self.store.downloaded_path("2"), "basefile 2, repo b") + def parse(self, basefile): + if basefile == "1": + util.writefile(self.store.parsed_path("1"), + "basefile 1, parsed by b") + util.writefile(self.store.parsed_path("1", attachment="attach.txt"), + "attachment for basefile 1, parsed by b") + util.writefile(self.store.distilled_path("1"), + "basefile 1, metadata from b") + return True + else: + raise errors.ParseError("No can do!") + class CompositeExample(CompositeRepository): subrepos = SubrepoB, SubrepoA + storage_policy = "dir" class TestComposite(RepoTester): repoclass = CompositeExample @@ -30,18 +57,40 @@ class TestComposite(RepoTester): def test_download(self): self.repo.download() self.assertEqual("basefile 1, repo a", - util.readfile(self.datadir+"/a/downloaded/1.html")) + util.readfile(self.datadir+"/a/downloaded/1/index.html")) self.assertEqual("basefile 1, repo b", - util.readfile(self.datadir+"/b/downloaded/1.html")) + util.readfile(self.datadir+"/b/downloaded/1/index.html")) self.assertEqual("basefile 2, repo b", - util.readfile(self.datadir+"/b/downloaded/2.html")) + util.readfile(self.datadir+"/b/downloaded/2/index.html")) - @unittest.expectedFailure def test_list_basefiles_for(self): self.repo.download() # This doesn't work since self.repo.store.docrepos has # uninitialized classes, not objects - self.assertEqual(["1", "2"], - list(self.repo.store.list_basefiles_for("parse"))) - + self.assertEqual(set(["2", "1"]), + set(self.repo.store.list_basefiles_for("parse"))) + + def test_parse(self): + # we already know list_basefiles_for("parse") will return ["2", "1"] + self.assertTrue(self.repo.parse("1")) # both A and B can handle this + # but B should win + self.assertEqual("basefile 1, parsed by b", + util.readfile(self.repo.store.parsed_path("1"))) + self.assertEqual("basefile 1, metadata from b", + util.readfile(self.repo.store.distilled_path("1"))) + self.assertTrue(["attach.txt"], + self.repo.store.list_attachments("1", "parsed")) + self.assertFalse(self.repo.parse("2")) # none can handle this + + # in this case, all files should be up-to-date, so no copying + # should occur (triggering the "Attachments are (likely) + # up-to-date branch") + self.assertTrue(self.repo.parse("1")) + + # and finally, list_basefiles_for("generate") should delegate + # to DocumentStore.list_basefiles_for + self.assertEqual(set(["1"]), + set(self.repo.store.list_basefiles_for("generate"))) + + diff --git a/test/testDecorators.py b/test/testDecorators.py index 11c71b8f..392a39d8 100644 --- a/test/testDecorators.py +++ b/test/testDecorators.py @@ -1,20 +1,14 @@ # -*- coding: utf-8 -*- from __future__ import unicode_literals -import sys, os -from ferenda.compat import unittest +import sys, os, datetime +from ferenda.compat import unittest, Mock, MagicMock, patch if os.getcwd() not in sys.path: sys.path.insert(0,os.getcwd()) -try: - # assume we're on py3.3 and fall back if not - from unittest.mock import Mock, MagicMock, patch -except ImportError: - from mock import Mock, MagicMock, patch - from ferenda import DocumentRepository, Document from ferenda.errors import DocumentRemovedError, ParseError # SUT -from ferenda.decorators import timed, parseifneeded, render, handleerror, makedocument +from ferenda.decorators import timed, parseifneeded, render, handleerror, makedocument, recordlastdownload, downloadmax class Decorators(unittest.TestCase): @@ -99,8 +93,10 @@ def testfunc(repo,doc): mockrepo.store.distilled_path.return_value = "distilled_path.xhtml" mockrepo.get_globals.return_value = {'symbol table':'fake'} - mockdoc.meta = MagicMock() - mockdoc.body = [] + mockdoc.meta = MagicMock() # need Magicmock which supports magic funcs like __iter__ + bodypart = MagicMock() + bodypart.meta = MagicMock() + mockdoc.body = [bodypart] mockdoc.meta.__iter__.return_value = [] mockdoc.uri = "http://example.org/doc" with patch('ferenda.util.ensure_dir', return_value=True): @@ -192,3 +188,36 @@ def testfunc(repo,doc): doc = testfunc(DocumentRepository(),"base/file") self.assertIsInstance(doc,Document) self.assertEqual(doc.basefile, "base/file") + + def test_recordlastdownload(self): + @recordlastdownload + def testfunc(repo): + pass + mockrepo = Mock() + with patch('ferenda.decorators.LayeredConfig.write') as mockconf: + testfunc(mockrepo) + # check that config.lastdownload has been set to a datetime + self.assertIsInstance(mockrepo.config.lastdownload, + datetime.datetime) + # and that LayeredConfig.write has been called + self.assertTrue(mockconf.called) + + def test_downloadmax(self): + @downloadmax + def testfunc(repo, source): + for x in range(100): + yield x + mockrepo = Mock() + mockrepo.config.downloadmax = None + self.assertEqual(100, len(list(testfunc(mockrepo, None)))) + + os.environ["FERENDA_DOWNLOADMAX"] = "10" + self.assertEqual(10, len(list(testfunc(mockrepo, None)))) + + del os.environ["FERENDA_DOWNLOADMAX"] + mockrepo.config.downloadmax = 20 + self.assertEqual(20, len(list(testfunc(mockrepo, None)))) + + + + diff --git a/test/testDescriber.py b/test/testDescriber.py new file mode 100644 index 00000000..38b3940c --- /dev/null +++ b/test/testDescriber.py @@ -0,0 +1,71 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +import datetime + +from ferenda.compat import unittest + +from rdflib import Graph, Namespace + +# SUT +from ferenda import Describer +DCT = Namespace("http://purl.org/dc/terms/") +FOAF = Namespace("http://xmlns.com/foaf/0.1/") + +class TestDescriber(unittest.TestCase): + def setUp(self): + self.graph = Graph() + self.graph.parse(data=""" +@prefix dct: . +@prefix foaf: . +@prefix xsd: . + + a foaf:Document; + dct:title "Hello world"@en ; + dct:identifier "ID1", + "ID2"; + dct:issued "2013-10-11"^^xsd:date; + dct:references ; + dct:subject , + . + """, format="turtle") + self.desc = Describer(self.graph, "http://example.org/doc") + + def test_getvalues(self): + self.assertEqual(self.desc.getvalues(DCT.alternate), + []) + self.assertEqual(self.desc.getvalues(DCT.title), + ["Hello world"]) + self.assertEqual(set(self.desc.getvalues(DCT.identifier)), + set(["ID1", "ID2"])) + + def test_getvalue(self): + self.assertEqual(self.desc.getvalue(DCT.title), + "Hello world") + self.assertEqual(self.desc.getvalue(DCT.issued), + datetime.date(2013,10,11)) + with self.assertRaises(KeyError): + self.desc.getvalue(DCT.alternate) + with self.assertRaises(KeyError): + self.desc.getvalue(DCT.identifier) + + def test_getrels(self): + self.assertEqual(self.desc.getrels(DCT.replaces), + []) + self.assertEqual(self.desc.getrels(DCT.references), + ["http://example.org/doc2"]) + self.assertEqual(set(self.desc.getrels(DCT.subject)), + set(["http://example.org/concept1", + "http://example.org/concept2"])) + + def test_getrel(self): + self.assertEqual(self.desc.getrel(DCT.references), + "http://example.org/doc2") + with self.assertRaises(KeyError): + self.desc.getrel(DCT.replaces) + with self.assertRaises(KeyError): + self.desc.getrel(DCT.subject) + + def test_getrdftype(self): + self.assertEqual(self.desc.getrdftype(), + "http://xmlns.com/foaf/0.1/Document") diff --git a/test/testDevel.py b/test/testDevel.py index 4d6fadc1..e79a7313 100644 --- a/test/testDevel.py +++ b/test/testDevel.py @@ -2,12 +2,36 @@ from __future__ import unicode_literals import sys, os -from ferenda.compat import unittest + +import six +from ferenda.compat import unittest, patch, call, MagicMock if os.getcwd() not in sys.path: sys.path.insert(0,os.getcwd()) +from tempfile import mkstemp + from ferenda import Devel class Main(unittest.TestCase): + def test_dumprdf(self): + fileno, tmpfile = mkstemp() + fp = os.fdopen(fileno, "w") + fp.write(""" + + Doc title + + ... + """) + fp.close() + d = Devel() + mock = MagicMock() + builtins = "__builtin__" if six.PY2 else "builtins" + with patch(builtins+'.print', mock): + d.dumprdf(tmpfile, format="nt") + self.assertTrue(mock.called) + want = ' "Doc title" .\n\n' + mock.assert_has_calls([call(want)]) + + def test_parsestring(self): d = Devel() with self.assertRaises(NotImplementedError): diff --git a/test/testDocStore.py b/test/testDocStore.py index dd28402c..92f48501 100644 --- a/test/testDocStore.py +++ b/test/testDocStore.py @@ -157,7 +157,7 @@ def test_list_basefiles_file(self): self.assertEqual(list(self.store.list_basefiles_for("parse")), basefiles) - def test_list_basefiles_dir(self): + def test_list_basefiles_parse_dir(self): files = ["downloaded/123/a/index.html", "downloaded/123/b/index.html", "downloaded/124/a/index.html", @@ -170,6 +170,19 @@ def test_list_basefiles_dir(self): self.assertEqual(list(self.store.list_basefiles_for("parse")), basefiles) + def test_list_basefiles_generate_dir(self): + files = ["parsed/123/a/index.xhtml", + "parsed/123/b/index.xhtml", + "parsed/124/a/index.xhtml", + "parsed/124/b/index.xhtml"] + basefiles = ["124/b", "124/a", "123/b", "123/a"] + + self.store.storage_policy = "dir" + for f in files: + util.writefile(self.p(f),"nonempty") + self.assertEqual(list(self.store.list_basefiles_for("generate")), + basefiles) + def test_list_versions_file(self): files = ["archive/downloaded/123/a/1.html", "archive/downloaded/123/a/2.html", diff --git a/test/testManager.py b/test/testManager.py index a7340a2f..0d205afd 100644 --- a/test/testManager.py +++ b/test/testManager.py @@ -92,6 +92,15 @@ def mymethod(self, arg): """Frobnicate the bizbaz (alternate implementation)""" if arg == "myarg": return "yeah!" + +class staticmockclass3(staticmockclass): + """Yet another (overrides footer())""" + alias="staticmock3" + def footer(self): + return (("About", "http://example.org/about"), + ("Legal", "http://example.org/legal"), + ("Contact", "http://example.org/contact") + ) class API(unittest.TestCase): """Test cases for API level methods of the manager modules (functions @@ -267,11 +276,8 @@ def test_makeresources(self): got = manager.makeresources([test],self.tempdir+os.sep+'rsrc', combine=True) # test7: test the footer() functionality - from ferenda.sources.general import Static - static = Static() - for b in static.store.list_basefiles_for("parse"): - static.parse(b) - got = manager.makeresources([Static()], self.tempdir+os.sep+'rsrc') + test = staticmockclass3() + got = manager.makeresources([test], self.tempdir+os.sep+'rsrc') tree = ET.parse(self.tempdir+os.sep+got['xml'][0]) footerlinks=tree.findall("footerlinks/nav/ul/li") self.assertTrue(footerlinks)