Skip to content

Commit

Permalink
transformlinks is now only done when really needed
Browse files Browse the repository at this point in the history
  • Loading branch information
staffanm committed Aug 14, 2018
1 parent 946c933 commit 33bd4bf
Show file tree
Hide file tree
Showing 8 changed files with 110 additions and 29 deletions.
2 changes: 1 addition & 1 deletion ferenda/decorators.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
try:
from inspect import getfullargspec
except ImportError: # py 2 doesn't have getfullargspec, use getargspec instead
from inspect import getargspec
from inspect import getargspec as getfullargspec

from rdflib import Graph, URIRef
from rdflib.compare import graph_diff
Expand Down
29 changes: 21 additions & 8 deletions ferenda/devel.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@
CompositeRepository, DocumentEntry, Transformer,
RequestHandler, ResourceLoader)
from ferenda.elements import serialize
from ferenda.elements.html import Body, P, H1, H2, Form, Textarea, Input, Label, Button, Textarea, Br, Div, A, Pre, Code, UL, LI
from ferenda.elements.html import Body, P, H1, H2, H3, Form, Textarea, Input, Label, Button, Textarea, Br, Div, A, Pre, Code, UL, LI
from ferenda import decorators, util, manager

class DummyStore(object):
Expand Down Expand Up @@ -567,7 +567,7 @@ def analyze_buildstats(self, logfilename):
output = StringIO()
counters = defaultdict(Counter)
msgloc = re.compile(" \([\w/]+.py:\d+\)").search
eventok = re.compile("[^ ]+: (download|parse|relate|generate) OK").match
eventok = re.compile("[^ ]+: (download|parse|relate|generate|transformlinks) OK").match
with open(logfilename) as fp:
for line in fp:
try:
Expand All @@ -582,10 +582,11 @@ def analyze_buildstats(self, logfilename):
action = m.group(1)
counters[action][module] += 1
sortkeys = defaultdict(int,
{"download": -4,
"parse": -3,
"relate": -2,
"generate": -1})
{"download": -5,
"parse": -4,
"relate": -3,
"generate": -2,
"transformlinks": -1})
actions = sorted(counters.keys(), key=sortkeys.get) # maybe sort in a reasonable order?
if actions:
alength = max([len(a) for a in actions])
Expand All @@ -605,6 +606,15 @@ def analyze_buildstats(self, logfilename):

def handle_logs(self, environ, params):
logdir = self.repo.config.datadir + os.sep + "logs"
def elapsedtime(f):
with open(f) as fp:
first = fp.readline()
fp.seek(os.path.getsize(f) - 500)
last = fp.read().split("\n")[-2]
start = datetime.strptime(first.split(" ")[0], "%H:%M:%S")
end = datetime.strptime(last.split(" ")[0], "%H:%M:%S")
return end - start # FIXME: Handle wraparound

def firstline(f):
with open(logdir+os.sep+f) as fp:
# trim uninteresting things from start and end
Expand All @@ -631,12 +641,15 @@ def linkelement(f):
if not errorstats:
errorstats = "[analyze_log didn't return any output?]"
logcontents = util.readfile(logfilename)

elapsed = elapsedtime(logfilename)
return Body([
Div([H2([params['file']]),
P(["Log processed in %.3f s" % (time.time() - start)]),
P(["Log processed in %.3f s. The logged action took %.0f s." % (time.time() - start, elapsed.total_seconds())]),
H3(["Buildstats"]),
Pre([buildstats]),
H3(["Errors"]),
Pre([errorstats]),
H3(["Logs"]),
Pre([logcontents], **{'class': 'logviewer'})])])


Expand Down
6 changes: 1 addition & 5 deletions ferenda/documententry.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,10 +114,7 @@ def __init__(self, path=None):
'indexed_ts',
'indexed_dep',
'indexed_ft',
'status.download.date',
'status.parse.date',
'status.relate.date',
'status.generate.date')
'date')
try:
d = json.load(fp, object_hook=hook)
except JSONDecodeError as e:
Expand Down Expand Up @@ -274,7 +271,6 @@ def updateentry(f, section, entrypath, *args, **kwargs):
is assumed to be the first element in args.
"""

def clear(key, d):
if key in d:
del d[key]
Expand Down
15 changes: 13 additions & 2 deletions ferenda/documentrepository.py
Original file line number Diff line number Diff line change
Expand Up @@ -2666,9 +2666,10 @@ def generated_url(self, basefile):
#
# STEP 4.5: After generating HTML, go through all links and
# rewrite/transform them (we cannot do that as part of generate(),
# since the transform may depend on whether generated files exist
# on disk)
# since the transform may depend on whether other generated files
# exist on disk, to know whether keep links to them or not)
@decorators.action
@decorators.ifneeded('transformlinks')
def transformlinks(self, basefile, otherrepos=[]):
"""Transform links in generated HTML files.
Expand Down Expand Up @@ -3617,6 +3618,16 @@ def get_status(self):
:rtype: dict
"""
# FIXME:
# * This needs to output data about whether relate or
# transformlinks needs to run (even though these actions don'
# result in new files).
# * It should use the logic provided by DocumentStore.needed,
# not calling outfile_is_newer et al on its own
# * Should be able to run with a single basefile, and explain
# the status of the different actions (ie generate needed
# because a dependency is newer than existing generated
# file)
status = OrderedDict()
exists = []
todo = []
Expand Down
33 changes: 22 additions & 11 deletions ferenda/documentstore.py
Original file line number Diff line number Diff line change
Expand Up @@ -372,6 +372,17 @@ def needed(self, basefile, action):
"""

def newer(filename, dt, field):
if not os.path.exists(filename):
return False
elif not dt: # has never been indexed
return Needed(reason="%s has not been processed" % filename)
else:
if datetime.fromtimestamp(os.stat(filename).st_mtime) > dt:
return Needed(reason="%s is newer than %s in documententry %s" % (filename, field, entry._path))
else:
return False

# if this function is even called, it means that force is not
# true (or ferenda-build.py has not been called with a single
# basefile, which is an implied force)
Expand All @@ -385,16 +396,6 @@ def needed(self, basefile, action):
return False
elif action == "relate":
entry = DocumentEntry(self.documententry_path(basefile))
def newer(filename, dt, field):
if not os.path.exists(filename):
return False
elif not dt: # has never been indexed
return Needed(reason="%s has not been processed" % filename)
else:
if datetime.fromtimestamp(os.stat(filename).st_mtime) > dt:
return Needed(reason="%s is newer than %s in documententry %s" % (filename, field, entry._path))
else:
return False

return RelateNeeded(
fulltext=newer(self.parsed_path(basefile), entry.indexed_ft, 'indexed_ft'),
Expand All @@ -418,7 +419,17 @@ def newer(filename, dt, field):
outfile += ".404"
newer = util.outfile_is_newer(dependencies, outfile)
if not newer:
return Needed(reason = getattr(newer, 'reason', None))
return Needed(reason=getattr(newer, 'reason', None))
else:
return False
elif action == "transformlinks":
entry = DocumentEntry(self.documententry_path(basefile))
infile = self.generated_path(basefile)
# if entry.status['generate']['date'] is older than the
# file modification date, something has modified the file
# after generate -- most likely a call to transformlinks()
if not newer(infile, entry.updated, 'updated'):
return Needed(reason="%s has not been modified after generate at %s" % (infile, entry.status['generate']['date']))
else:
return False
else:
Expand Down
31 changes: 31 additions & 0 deletions test/testDocEntry.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,24 @@ class DocEntry(unittest.TestCase):
"url": null
}"""

status_json = """{
"status": {
"download": {
"date": "2018-08-14T18:15:00"
},
"parse": {
"date": "2018-08-14T18:16:00"
},
"relate": {
"date": "2018-08-14T18:17:00"
},
"generate": {
"date": "2018-08-14T18:18:00",
"not_a_date": "2018-08-14T18:18:00"
}
}
}"""

def setUp(self):
self.maxDiff = None
self.datadir = tempfile.mkdtemp()
Expand Down Expand Up @@ -199,3 +217,16 @@ def test_guess_type(self):
self.assertEqual(d.guess_type("test.xhtml"),"application/html+xml")
self.assertEqual(d.guess_type("test.bin"), "application/octet-stream")

def test_load_status(self):
path = self.repo.store.documententry_path("123/a")
util.ensure_dir(path)
with open(path, "w") as fp:
fp.write(self.status_json)
d = DocumentEntry(path=path)
self.assertEqual(datetime(2018,8,14,18,15,00), d.status['download']['date'])
self.assertEqual(datetime(2018,8,14,18,16,00), d.status['parse']['date'])
self.assertEqual(datetime(2018,8,14,18,17,00), d.status['relate']['date'])
self.assertEqual(datetime(2018,8,14,18,18,00), d.status['generate']['date'])
self.assertEqual("2018-08-14T18:18:00", d.status['generate']['not_a_date'])


3 changes: 3 additions & 0 deletions test/testDocRepo.py
Original file line number Diff line number Diff line change
Expand Up @@ -1532,6 +1532,9 @@ class OtherRepo(DocumentRepository):

class Transformlinks(RepoTester):
def setUp(self):
self.repo.config.force = True # needed now that we use the
# @ifneeded decorator on
# transformlinks()
with self.repo.store.open_generated("1", "w") as fp:
# this exact code might vary depending on how base.xsl
# evolves, but we're really only interested in link
Expand Down
20 changes: 18 additions & 2 deletions test/testDocStore.py
Original file line number Diff line number Diff line change
Expand Up @@ -412,16 +412,19 @@ def test_parse_needed_outdated(self):
self.assertTrue(res)
self.assertIn("is newer than outfile", res.reason)


def create_entry(self, basefile, timestampoffset=0):
# create a entry file with indexed_{ft,ts,dep} set to the
# current time with optional offset
# current time with optional offset. Also
# .status['generated']['date'], to test needed(...,
# 'transformlinks')
de = DocumentEntry(self.store.documententry_path(basefile))
delta = timedelta(seconds=timestampoffset)
ts = datetime.now() + delta
de.indexed_ts = ts
de.indexed_ft = ts
de.indexed_dep = ts
de.updated = ts
de.status['generate'] = {'date': ts}
de.save()

def test_relate_not_needed(self):
Expand Down Expand Up @@ -491,6 +494,19 @@ def test_generate_needed_outdated_dep(self):
res = self.store.needed("a", "generate")
self.assertTrue(res)
self.assertIn("is newer than outfile", res.reason)

def test_transformlinks_needed(self):
self.create_file(self.store.generated_path("a"))
self.create_entry("a")
res = self.store.needed("a", "transformlinks")
self.assertTrue(res)
self.assertIn("has not been modified after generate", res.reason)

def test_transformlinks_not_needed(self):
self.create_entry("a", -3600)
self.create_file(self.store.generated_path("a"))
self.assertFalse(self.store.needed("a", "transformlinks"))


import doctest
from ferenda import documentstore
Expand Down

0 comments on commit 33bd4bf

Please sign in to comment.