Skip to content

Commit

Permalink
ran autopep8 / autoflake
Browse files Browse the repository at this point in the history
  • Loading branch information
staffanm committed Sep 15, 2013
1 parent 6b4f24e commit ee8880c
Show file tree
Hide file tree
Showing 60 changed files with 2,235 additions and 1,881 deletions.
12 changes: 7 additions & 5 deletions ferenda/citationparser.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals

import pyparsing
import six

from ferenda.elements import LinkSubject, serialize
from ferenda.elements import LinkSubject


class CitationParser(object):

"""Finds citations to documents and other resources in text
strings. Each type of citation is specified by a
:py:mod:`pyparsing` grammar, and for each found citation a URI
Expand Down Expand Up @@ -71,7 +71,8 @@ def parse_string(self, string):
of (string, :py:class:`pyparsing.ParseResult`)
:rtype: list
"""
# Returns a list of strings and/or tuples, where each tuple is (string,pyparsing.ParseResult)
# Returns a list of strings and/or tuples, where each tuple is
# (string,pyparsing.ParseResult)
nodes = [string]
for grammar in self._grammars:
res = []
Expand Down Expand Up @@ -103,15 +104,16 @@ def parse_recursive(self, part):
:type part: list
:returns: a correspondingly nested structure.
:rtype: list"""

res = []
if not isinstance(part, six.text_type):
for subpart in part:
if isinstance(subpart, six.text_type):
res.extend(self.parse_recursive(subpart))
else:
res.append(self.parse_recursive(subpart))
part[:] = res[:] # replace our exising subparts/children with the combined result of parse_recursive
# replace our exising subparts/children with the combined result of parse_recursive
part[:] = res[:]
return part

else: # ok, simple string
Expand Down
7 changes: 4 additions & 3 deletions ferenda/citationpatterns.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from __future__ import unicode_literals
from pyparsing import *

################################################################
#
#
# ferenda.citationpatterns.url
#
Expand All @@ -34,13 +34,14 @@
'https://example.org/?key=value#fragment' (note: only the
schemes/protocols 'http', 'https' and 'ftp' are supported)"""

################################################################
#
#
# ferenda.citationpatterns.eulaw
#
LongYear = Word(nums, exact=4)
ShortYear = Word(nums, exact=2)
Month = oneOf("januari februari mars april maj juni juli augusti september oktober november december")
Month = oneOf(
"januari februari mars april maj juni juli augusti september oktober november december")
DayInMonth = Word(nums, max=2)
Date = (DayInMonth + Month + LongYear)

Expand Down
18 changes: 9 additions & 9 deletions ferenda/compositerepository.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,15 @@

from . import DocumentRepository, DocumentStore


class CompositeStore(DocumentStore):

def __init__(self, datadir, downloaded_suffix=".html", storage_policy="file", docrepos=[]):
self.datadir = datadir # docrepo.datadir + docrepo.alias
self.datadir = datadir # docrepo.datadir + docrepo.alias
self.downloaded_suffix = downloaded_suffix
self.storage_policy = storage_policy
self.docrepos=docrepos
self.docrepos = docrepos

def list_basefiles_for(self, action, basedir=None):
if not basedir:
basedir = self.datadir
Expand All @@ -29,15 +30,15 @@ def list_basefiles_for(self, action, basedir=None):


class CompositeRepository(DocumentRepository):
subrepos = () # list of classes
subrepos = () # list of classes
documentstore_class = CompositeStore

_instances = {}

def get_instance(self, instanceclass, options={}):
if not instanceclass in self._instances:
inst = instanceclass(**options)
inst.config = self.config # FIXME: this'll override **options...
inst.config = self.config # FIXME: this'll override **options...
self._instances[instanceclass] = inst
return self._instances[instanceclass]

Expand All @@ -50,11 +51,11 @@ def __init__(self, **kwargs):
# finishes... The best fix from this class POV would be to
# have config be a (special) kwargs parameter, but that
# violates the DocumentRepository API...
self.store = self.documentstore_class(self.config.datadir+os.sep+self.alias,
self.store = self.documentstore_class(self.config.datadir + os.sep + self.alias,
downloaded_suffix=self.downloaded_suffix,
storage_policy=self.storage_policy,
docrepos=self._instances)

def download(self):
for c in self.subrepos:
inst = self.get_instance(c, self.myoptions)
Expand All @@ -71,13 +72,12 @@ def parse(self, basefile):
# each parse method should be smart about whether to re-parse
# or not (i.e. use the @managedparsing decorator)
ret = inst.parse(basefile)
except errors.ParseError: # or others
except errors.ParseError: # or others
ret = False
if ret:
break
if ret:
self.copy_parsed(basefile, inst)


def copy_parsed(self, basefile, instance):
# If the distilled and parsed links are recent, assume that
Expand Down
33 changes: 18 additions & 15 deletions ferenda/decorators.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,13 @@
from datetime import datetime

import six
from six import text_type as str
from rdflib import Graph, URIRef

from ferenda import util
from ferenda import LayeredConfig
from ferenda.errors import DocumentRemovedError, ParseError


def timed(f):
"""Automatically log a statement of how long the function call takes"""
@functools.wraps(f)
Expand Down Expand Up @@ -82,48 +82,49 @@ def render(f):
with :py:func:`~ferenda.decorators.makedocument`."""
def iterate_graphs(node):
res = []
if hasattr(node,'meta') and node.meta is not None:
if hasattr(node, 'meta') and node.meta is not None:
res.append(node.meta)
for subnode in node:
if not isinstance(subnode,six.string_types):
if not isinstance(subnode, six.string_types):
res.extend(iterate_graphs(subnode))
return res

@functools.wraps(f)
def wrapper(self, doc):
ret = f(self, doc)
updated = self.render_xhtml(doc, self.store.parsed_path(doc.basefile))
if updated:
self.log.debug("%s: Created %s" % (doc.basefile,self.store.parsed_path(doc.basefile)))
self.log.debug("%s: Created %s" % (doc.basefile, self.store.parsed_path(doc.basefile)))

# css file + background images + png renderings of text
self.create_external_resources(doc)

# Check to see that all metadata contained in doc.meta is
# present in the serialized file.
distilled_graph = Graph()
with codecs.open(self.store.parsed_path(doc.basefile), encoding="utf-8") as fp: # unicode

with codecs.open(self.store.parsed_path(doc.basefile), encoding="utf-8") as fp: # unicode
distilled_graph.parse(data=fp.read(), format="rdfa", publicID=doc.uri)
# The act of parsing from RDFa binds a lot of namespaces
# in the graph in an unneccesary manner. Particularly it
# binds both 'dc' and 'dcterms' to
# 'http://purl.org/dc/terms/', which makes serialization
# less than predictable. Blow these prefixes away.
distilled_graph.bind("dc", URIRef("http://purl.org/dc/elements/1.1/"))
distilled_graph.bind("dcterms", URIRef("http://example.org/this-prefix-should-not-be-used"))

distilled_graph.bind(
"dcterms", URIRef("http://example.org/this-prefix-should-not-be-used"))

util.ensure_dir(self.store.distilled_path(doc.basefile))
with open(self.store.distilled_path(doc.basefile), "wb") as distilled_file:
#print("============distilled===============")
#print(distilled_graph.serialize(format="turtle").decode('utf-8'))
# print("============distilled===============")
# print(distilled_graph.serialize(format="turtle").decode('utf-8'))
distilled_graph.serialize(distilled_file, format="pretty-xml")
self.log.debug(
'%s: %s triples extracted to %s', doc.basefile, len(distilled_graph), self.store.distilled_path(doc.basefile))

for g in iterate_graphs(doc.body):
doc.meta += g

for triple in distilled_graph:
# len_before = len(doc.meta)
doc.meta.remove(triple)
Expand Down Expand Up @@ -156,14 +157,14 @@ def wrapper(self, doc):
except ParseError as e:
self.log.error("%s: ParseError %s", doc.basefile, e)
if (hasattr(self.config, 'fatalexceptions') and
self.config.fatalexceptions):
self.config.fatalexceptions):
raise
else:
return False
except:
self.log.exception("parse of %s failed", doc.basefile)
if (hasattr(self.config, 'fatalexceptions') and
self.config.fatalexceptions):
self.config.fatalexceptions):
raise
else:
return False
Expand All @@ -185,17 +186,19 @@ def managedparsing(f):
(:py:func:`~ferenda.decorators.makedocument`, :py:func:`~ferenda.decorators.parseifneeded`, :py:func:`~ferenda.decorators.timed`, :py:func:`~ferenda.decorators.render`)"""
return makedocument(
parseifneeded(
#handleerror( # is this really a good idea?
# handleerror( # is this really a good idea?
timed(
render(f))))


def action(f):
"""Decorator that marks a class or instance method as runnable by
:py:func:`ferenda.manager.run`
"""
f.runnable = True
return f


def downloadmax(f):
"""Makes any generator respect the ``downloadmax`` config parameter.
Expand Down
9 changes: 6 additions & 3 deletions ferenda/describer.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
from rdflib.extras.describer import Describer as OrigDescriber
from rdflib import URIRef, Literal, RDF, Graph
from rdflib import URIRef
from rdflib import Literal
from rdflib import RDF


class Describer(OrigDescriber):

"""Extends the utility class
:py:class:`rdflib.extras.describer.Describer` so that it reads
values and refences as well as write them.
Expand Down Expand Up @@ -51,7 +54,7 @@ def getrdftype(self):
:rtype: string
"""
return self.getrel(RDF.type)

def getvalue(self, p):
"""Get a single literal value for the given property and the
current subject. If the graph contains zero or more than one
Expand All @@ -77,7 +80,7 @@ def getvalue(self, p):
raise KeyError("No objects for predicate %s" % p)
elif len(values) > 1:
raise KeyError("More than one object for predicatee %s" % p)
return values[0]
return values[0]

def getrel(self, p):
"""Get a single URI for the given property and the current
Expand Down

0 comments on commit ee8880c

Please sign in to comment.