diff --git a/ferenda/decorators.py b/ferenda/decorators.py index 85ebd89c..716c0139 100644 --- a/ferenda/decorators.py +++ b/ferenda/decorators.py @@ -25,7 +25,7 @@ try: from inspect import getfullargspec except ImportError: # py 2 doesn't have getfullargspec, use getargspec instead - from inspect import getargspec + from inspect import getargspec as getfullargspec from rdflib import Graph, URIRef from rdflib.compare import graph_diff diff --git a/ferenda/devel.py b/ferenda/devel.py index be18c8e9..7fc042a0 100644 --- a/ferenda/devel.py +++ b/ferenda/devel.py @@ -42,7 +42,7 @@ CompositeRepository, DocumentEntry, Transformer, RequestHandler, ResourceLoader) from ferenda.elements import serialize -from ferenda.elements.html import Body, P, H1, H2, Form, Textarea, Input, Label, Button, Textarea, Br, Div, A, Pre, Code, UL, LI +from ferenda.elements.html import Body, P, H1, H2, H3, Form, Textarea, Input, Label, Button, Textarea, Br, Div, A, Pre, Code, UL, LI from ferenda import decorators, util, manager class DummyStore(object): @@ -567,7 +567,7 @@ def analyze_buildstats(self, logfilename): output = StringIO() counters = defaultdict(Counter) msgloc = re.compile(" \([\w/]+.py:\d+\)").search - eventok = re.compile("[^ ]+: (download|parse|relate|generate) OK").match + eventok = re.compile("[^ ]+: (download|parse|relate|generate|transformlinks) OK").match with open(logfilename) as fp: for line in fp: try: @@ -582,10 +582,11 @@ def analyze_buildstats(self, logfilename): action = m.group(1) counters[action][module] += 1 sortkeys = defaultdict(int, - {"download": -4, - "parse": -3, - "relate": -2, - "generate": -1}) + {"download": -5, + "parse": -4, + "relate": -3, + "generate": -2, + "transformlinks": -1}) actions = sorted(counters.keys(), key=sortkeys.get) # maybe sort in a reasonable order? if actions: alength = max([len(a) for a in actions]) @@ -605,6 +606,15 @@ def analyze_buildstats(self, logfilename): def handle_logs(self, environ, params): logdir = self.repo.config.datadir + os.sep + "logs" + def elapsedtime(f): + with open(f) as fp: + first = fp.readline() + fp.seek(os.path.getsize(f) - 500) + last = fp.read().split("\n")[-2] + start = datetime.strptime(first.split(" ")[0], "%H:%M:%S") + end = datetime.strptime(last.split(" ")[0], "%H:%M:%S") + return end - start # FIXME: Handle wraparound + def firstline(f): with open(logdir+os.sep+f) as fp: # trim uninteresting things from start and end @@ -631,12 +641,15 @@ def linkelement(f): if not errorstats: errorstats = "[analyze_log didn't return any output?]" logcontents = util.readfile(logfilename) - + elapsed = elapsedtime(logfilename) return Body([ Div([H2([params['file']]), - P(["Log processed in %.3f s" % (time.time() - start)]), + P(["Log processed in %.3f s. The logged action took %.0f s." % (time.time() - start, elapsed.total_seconds())]), + H3(["Buildstats"]), Pre([buildstats]), + H3(["Errors"]), Pre([errorstats]), + H3(["Logs"]), Pre([logcontents], **{'class': 'logviewer'})])]) diff --git a/ferenda/documententry.py b/ferenda/documententry.py index 89c45fa7..a5ec777a 100644 --- a/ferenda/documententry.py +++ b/ferenda/documententry.py @@ -114,10 +114,7 @@ def __init__(self, path=None): 'indexed_ts', 'indexed_dep', 'indexed_ft', - 'status.download.date', - 'status.parse.date', - 'status.relate.date', - 'status.generate.date') + 'date') try: d = json.load(fp, object_hook=hook) except JSONDecodeError as e: @@ -274,7 +271,6 @@ def updateentry(f, section, entrypath, *args, **kwargs): is assumed to be the first element in args. """ - def clear(key, d): if key in d: del d[key] diff --git a/ferenda/documentrepository.py b/ferenda/documentrepository.py index 4a9084c4..8dc08fbb 100644 --- a/ferenda/documentrepository.py +++ b/ferenda/documentrepository.py @@ -2666,9 +2666,10 @@ def generated_url(self, basefile): # # STEP 4.5: After generating HTML, go through all links and # rewrite/transform them (we cannot do that as part of generate(), - # since the transform may depend on whether generated files exist - # on disk) + # since the transform may depend on whether other generated files + # exist on disk, to know whether keep links to them or not) @decorators.action + @decorators.ifneeded('transformlinks') def transformlinks(self, basefile, otherrepos=[]): """Transform links in generated HTML files. @@ -3617,6 +3618,16 @@ def get_status(self): :rtype: dict """ + # FIXME: + # * This needs to output data about whether relate or + # transformlinks needs to run (even though these actions don' + # result in new files). + # * It should use the logic provided by DocumentStore.needed, + # not calling outfile_is_newer et al on its own + # * Should be able to run with a single basefile, and explain + # the status of the different actions (ie generate needed + # because a dependency is newer than existing generated + # file) status = OrderedDict() exists = [] todo = [] diff --git a/ferenda/documentstore.py b/ferenda/documentstore.py index 745d0232..ffbf13a6 100644 --- a/ferenda/documentstore.py +++ b/ferenda/documentstore.py @@ -372,6 +372,17 @@ def needed(self, basefile, action): """ + def newer(filename, dt, field): + if not os.path.exists(filename): + return False + elif not dt: # has never been indexed + return Needed(reason="%s has not been processed" % filename) + else: + if datetime.fromtimestamp(os.stat(filename).st_mtime) > dt: + return Needed(reason="%s is newer than %s in documententry %s" % (filename, field, entry._path)) + else: + return False + # if this function is even called, it means that force is not # true (or ferenda-build.py has not been called with a single # basefile, which is an implied force) @@ -385,16 +396,6 @@ def needed(self, basefile, action): return False elif action == "relate": entry = DocumentEntry(self.documententry_path(basefile)) - def newer(filename, dt, field): - if not os.path.exists(filename): - return False - elif not dt: # has never been indexed - return Needed(reason="%s has not been processed" % filename) - else: - if datetime.fromtimestamp(os.stat(filename).st_mtime) > dt: - return Needed(reason="%s is newer than %s in documententry %s" % (filename, field, entry._path)) - else: - return False return RelateNeeded( fulltext=newer(self.parsed_path(basefile), entry.indexed_ft, 'indexed_ft'), @@ -418,7 +419,17 @@ def newer(filename, dt, field): outfile += ".404" newer = util.outfile_is_newer(dependencies, outfile) if not newer: - return Needed(reason = getattr(newer, 'reason', None)) + return Needed(reason=getattr(newer, 'reason', None)) + else: + return False + elif action == "transformlinks": + entry = DocumentEntry(self.documententry_path(basefile)) + infile = self.generated_path(basefile) + # if entry.status['generate']['date'] is older than the + # file modification date, something has modified the file + # after generate -- most likely a call to transformlinks() + if not newer(infile, entry.updated, 'updated'): + return Needed(reason="%s has not been modified after generate at %s" % (infile, entry.status['generate']['date'])) else: return False else: diff --git a/test/testDocEntry.py b/test/testDocEntry.py index 9ac4269e..69f791d9 100644 --- a/test/testDocEntry.py +++ b/test/testDocEntry.py @@ -67,6 +67,24 @@ class DocEntry(unittest.TestCase): "url": null }""" + status_json = """{ + "status": { + "download": { + "date": "2018-08-14T18:15:00" + }, + "parse": { + "date": "2018-08-14T18:16:00" + }, + "relate": { + "date": "2018-08-14T18:17:00" + }, + "generate": { + "date": "2018-08-14T18:18:00", + "not_a_date": "2018-08-14T18:18:00" + } + } +}""" + def setUp(self): self.maxDiff = None self.datadir = tempfile.mkdtemp() @@ -199,3 +217,16 @@ def test_guess_type(self): self.assertEqual(d.guess_type("test.xhtml"),"application/html+xml") self.assertEqual(d.guess_type("test.bin"), "application/octet-stream") + def test_load_status(self): + path = self.repo.store.documententry_path("123/a") + util.ensure_dir(path) + with open(path, "w") as fp: + fp.write(self.status_json) + d = DocumentEntry(path=path) + self.assertEqual(datetime(2018,8,14,18,15,00), d.status['download']['date']) + self.assertEqual(datetime(2018,8,14,18,16,00), d.status['parse']['date']) + self.assertEqual(datetime(2018,8,14,18,17,00), d.status['relate']['date']) + self.assertEqual(datetime(2018,8,14,18,18,00), d.status['generate']['date']) + self.assertEqual("2018-08-14T18:18:00", d.status['generate']['not_a_date']) + + diff --git a/test/testDocRepo.py b/test/testDocRepo.py index a11a7bac..dcc815a4 100644 --- a/test/testDocRepo.py +++ b/test/testDocRepo.py @@ -1532,6 +1532,9 @@ class OtherRepo(DocumentRepository): class Transformlinks(RepoTester): def setUp(self): + self.repo.config.force = True # needed now that we use the + # @ifneeded decorator on + # transformlinks() with self.repo.store.open_generated("1", "w") as fp: # this exact code might vary depending on how base.xsl # evolves, but we're really only interested in link diff --git a/test/testDocStore.py b/test/testDocStore.py index aed6588f..9917fec1 100644 --- a/test/testDocStore.py +++ b/test/testDocStore.py @@ -412,16 +412,19 @@ def test_parse_needed_outdated(self): self.assertTrue(res) self.assertIn("is newer than outfile", res.reason) - def create_entry(self, basefile, timestampoffset=0): # create a entry file with indexed_{ft,ts,dep} set to the - # current time with optional offset + # current time with optional offset. Also + # .status['generated']['date'], to test needed(..., + # 'transformlinks') de = DocumentEntry(self.store.documententry_path(basefile)) delta = timedelta(seconds=timestampoffset) ts = datetime.now() + delta de.indexed_ts = ts de.indexed_ft = ts de.indexed_dep = ts + de.updated = ts + de.status['generate'] = {'date': ts} de.save() def test_relate_not_needed(self): @@ -491,6 +494,19 @@ def test_generate_needed_outdated_dep(self): res = self.store.needed("a", "generate") self.assertTrue(res) self.assertIn("is newer than outfile", res.reason) + + def test_transformlinks_needed(self): + self.create_file(self.store.generated_path("a")) + self.create_entry("a") + res = self.store.needed("a", "transformlinks") + self.assertTrue(res) + self.assertIn("has not been modified after generate", res.reason) + + def test_transformlinks_not_needed(self): + self.create_entry("a", -3600) + self.create_file(self.store.generated_path("a")) + self.assertFalse(self.store.needed("a", "transformlinks")) + import doctest from ferenda import documentstore