Skip to content

Commit

Permalink
misc fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
staffanm committed Feb 4, 2018
1 parent 9716634 commit 5a4d0d9
Show file tree
Hide file tree
Showing 14 changed files with 95 additions and 71 deletions.
3 changes: 2 additions & 1 deletion ferenda/documentrepository.py
Original file line number Diff line number Diff line change
Expand Up @@ -506,7 +506,8 @@ def lookup_resource(self, label, predicate=FOAF.name, cutoff=0.8, warn=True):

fuzz = difflib.get_close_matches(label, resources.keys(), 1, cutoff)
if fuzz:
if warn:
# even if we want warnings, we don't want warnings for case changes
if warn and label.lower() != fuzz[0].lower():
self.log.warning("Assuming that '%s' should be '%s'?" %
(label, fuzz[0]))
return URIRef(resources[fuzz[0]])
Expand Down
35 changes: 20 additions & 15 deletions ferenda/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -1228,6 +1228,7 @@ def _build_worker(jobqueue, resultqueue, clientname):
setproctitle(proctitle)
log.debug("Client: [pid %s] %s finished: %s" % (os.getpid(), job['basefile'], res))
outdict = {'basefile': job['basefile'],
'alias': job['alias'],
'result': res,
'log': list(logrecords),
'client': clientname}
Expand Down Expand Up @@ -1335,6 +1336,8 @@ def __queue_jobs_nomanager(jobqueue, iterable, inst, classname, command):


def _queue_jobs(manager, iterable, inst, classname, command):
def format_tupleset(s):
return ", ".join(("%s:%s" % (t[0], t[1])) for t in s)
jobqueue = manager.jobqueue()
resultqueue = manager.resultqueue()
log = getlog()
Expand All @@ -1359,33 +1362,35 @@ def _queue_jobs(manager, iterable, inst, classname, command):
'config': client_config}
# print("putting %r into jobqueue" % job)
jobqueue.put(job)
processing.add(basefile)
number_of_jobs = idx + 1
processing.add((inst.alias,basefile))
res = []
if number_of_jobs == 0:
numres = 0
if len(processing) == 0:
return res
log.info("Server: Put %s (%s) jobs into job queue" % (number_of_jobs, len(processing)))
log.info("%s: Put %s jobs into job queue" % (inst.alias, len(processing)))
# FIXME: only one of the clients will read this DONE package, and
# we have no real way of knowing how many clients there will be
# (they can come and go at will). Didn't think this one through...
jobqueue.put("DONE")
numres = 0
# jobqueue.put("DONE")
res = []
clients = Counter()
signal.signal(signal.SIGALRM, _resultqueue_get_timeout)
# FIXME: be smart about how long we wait before timing out the resultqueue.get() call
timeout_length = 180
while numres < number_of_jobs:
timeout_length = 300
while len(processing) > 0:
try:
r = resultqueue.get()
except TimeoutError:
log.critical("Timeout: %s jobs not processed (%s)" % (len(processing), ", ".join(processing)))
numres = number_of_jobs
log.critical("Timeout: %s jobs not processed (%s)" % (len(processing), format_tupleset(processing)))
processing.clear()
continue
signal.alarm(timeout_length)
if r['basefile'] not in processing:
log.warning("%s not found in processing (%s)" % (r['basefile'], ", ".join(processing)))
processing.discard(r['basefile'])
if (r['alias'], r['basefile']) not in processing:
if r['alias'] == inst.alias:
log.warning("%s not found in processing (%s)" % (r['basefile'], format_tupleset(processing)))
else:
log.warning("%s from repo %s was straggling, better late than never" % (r['basefile'], r['alias']))
processing.discard((r['alias'], r['basefile']))
if isinstance(r['result'], tuple) and r['result'][0] == _WrappedKeyboardInterrupt:
raise KeyboardInterrupt()
elif isinstance(r['result'], tuple) and isinstance(r['result'][0], Exception):
Expand All @@ -1407,15 +1412,15 @@ def _queue_jobs(manager, iterable, inst, classname, command):
r)
if 'client' in r:
clients[r['client']] += 1
if 'result' in r:
if 'result' in r and r['alias'] == inst.alias:
res.append(r['result'])
numres += 1

# ok, now we don't need to worry about timeouts anymore
signal.alarm(0)
# sort clients on name, not number of jobs
clientstats = ", ".join(["%s: %s jobs" % (k, v) for k,v in sorted(clients.items())])
log.info("Server: %s jobs processed. %s" % (numres, clientstats))
log.info("%s: %s jobs processed. %s" % (inst.alias, numres, clientstats))
return res
# sleep(1)
# don't shut this down --- the toplevel manager.run call must do
Expand Down
4 changes: 3 additions & 1 deletion ferenda/sources/legal/se/elements.py
Original file line number Diff line number Diff line change
Expand Up @@ -639,8 +639,10 @@ def as_xhtml(self, uri, parent_uri=None):
# do not add @property='dcterms:title' as we don't want to
# create a RDF triple out of this. But we kind of have to set
# rdf:type = bibo:DocumentPart (to make xsl/forarbete.xsl
# create proper TOC)
# create proper TOC). And _relate_fulltext_value_label is
# surprised if we lack title
element.set('content', self.title)
element.set('property', 'dcterms:title')
element.set('typeof', 'bibo:DocumentPart')
return element

Expand Down
2 changes: 1 addition & 1 deletion ferenda/sources/legal/se/offtryck.py
Original file line number Diff line number Diff line change
Expand Up @@ -1523,7 +1523,7 @@ def is_appendix_header(chunk):
if txt.startswith("Bilaga "):
# assume that whatever follows is a number -- if
# not, this is not a proper appendix header anyway
return int(re.split(r"[ \:]")[1])
return int(re.split(r"[ \:]", txt)[1])
else:
return True

Expand Down
6 changes: 6 additions & 0 deletions ferenda/sources/legal/se/propositioner.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,14 @@ def prop_sanitize_identifier(identifier):
identifier = identifier.replace("PROP", "Prop")
if identifier.startswith("Prop "):
identifier = identifier.replace("Prop ", "Prop. ")
if "\xa0" in identifier: # Non-breakable space
identifier = identifier.replace("\xa0", " ")
if not identifier.startswith("Prop. "):
identifier = "Prop. " + identifier
# identify and correct the not-uncommon "2009/2010:87" pattern (should be 2009/10:87)
m = re.search(r"(\d{4})/(\d{4}):(\d+)$", identifier)
if m and m.group(2) != "2000" and int(m.group(1)) == int(m.group(2)) - 1:
identifier = identifier.replace(m.group(2), m.group(2)[-2:])
if not re.match(r"^Prop\. (19|20)\d{2}(|/\d{2}|/2000):[1-9]\d*$", identifier):
raise ValueError("Irregular identifier %s" % identifier)
return Literal(identifier)
Expand Down
9 changes: 8 additions & 1 deletion ferenda/sources/legal/se/regeringen.py
Original file line number Diff line number Diff line change
Expand Up @@ -321,7 +321,14 @@ def extract_metadata(self, rawhead, basefile):
# the longest possible id, "Prop. 1999/2000:123", is 19 chars
if len(title) < 20 and title.endswith(basefile):
identifier = title
title = "" # FIXME: hunt for title amongst the PDF file links
title = ""
# maybe the real title is hiding in the ingress of the page?
alttitle = content.find("p", "ingress")
if alttitle:
alttitle = alttitle.text.strip()
# some basic heuristics to determine if this is likely to be a title
if alttitle.startswith("Tilläggsdirektiv") or len(alttitle) > 120:
title = alttitle
else:
identifier_node = content.find("span", "h1-vignette")
if identifier_node:
Expand Down
16 changes: 2 additions & 14 deletions ferenda/sources/legal/se/riksdagen.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
from .fixedlayoutsource import FixedLayoutSource, FixedLayoutStore
from . import Offtryck
from .legalref import LegalRef
from .swedishlegalsource import Lazyfile


class RiksdagenStore(FixedLayoutStore):
Expand Down Expand Up @@ -284,20 +285,7 @@ def lazy_downloaded_to_intermediate(basefile):
os.path.getsize(intermediate_path) / (1024*1024)))
return res

class lazyfile(object):
def __init__(self, constructor):
self.constructor = constructor
self.fp = None
self.patchdescription = None
self.closed = True

def __getattr__(self, name):
if self.fp is None:
self.fp = self.constructor()
self.patchdescription = self.fp.patchdescription
return getattr(self.fp, name)

return lazyfile(partial(lazy_downloaded_to_intermediate, basefile))
return Lazyfile(partial(lazy_downloaded_to_intermediate, basefile))


def metadata_from_basefile(self, basefile):
Expand Down
28 changes: 18 additions & 10 deletions ferenda/sources/legal/se/sfs.py
Original file line number Diff line number Diff line change
Expand Up @@ -754,8 +754,13 @@ def extract_metadata_register(self, soup, basefile):
identifier = util.ucfirst(str(node))
if identifier.startswith("Prop"):
from .propositioner import prop_sanitize_identifier
identifier = prop_sanitize_identifier(identifier)
d[node.uri] = {"dcterms:identifier": identifier}
try:
identifier = prop_sanitize_identifier(identifier)
except ValueError:
self.log.warning("%s: Could not sanitize irregular identifier %s" % (basefile, identifier))
identifier = None
if identifier:
d[node.uri] = {"dcterms:identifier": identifier}
elif key == 'CELEX-nr':
for celex in re.findall('3\d{2,4}[LR]\d{4}', val):
b = BNode()
Expand Down Expand Up @@ -1014,14 +1019,17 @@ def postprocess_doc(self, doc):
graph.add(triple)
doc.meta.remove(triple)
if p.identifier == RPUBL.forarbete:
triple = (o, DCTERMS.identifier,
doc.meta.value(o, DCTERMS.identifier))
graph.add(triple)
trash.add(triple)
triple = (o, RDF.type,
doc.meta.value(o, RDF.type))
graph.add(triple)
trash.add(triple)
forarb_identifier = doc.meta.value(o, DCTERMS.identifier)
if forarb_identifier: # not always the case, eg if the forarbete had an irregular identifier
triple = (o, DCTERMS.identifier,
forarb_identifier)
graph.add(triple)
trash.add(triple)
forarb_type = doc.meta.value(o, RDF.type)
if forarb_type:
triple = (o, RDF.type, forarb_type)
graph.add(triple)
trash.add(triple)
elif p.identifier == RPUBL.genomforDirektiv:
triple = (o, RPUBL.celexNummer,
doc.meta.value(o, RPUBL.celexNummer))
Expand Down
5 changes: 3 additions & 2 deletions ferenda/sources/legal/se/sou.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,10 @@
from ferenda import util, decorators
from ferenda.pdfreader import StreamingPDFReader
from . import Regeringen, SwedishLegalSource, FixedLayoutSource, SwedishLegalStore, Offtryck, RPUBL

from .swedishlegalsource import lazyread

def sou_sanitize_identifier(identifier):
if not re.match("SOU (19|20)\d{2}:[1-9]\d*", identifier):
if not re.match("SOU (19|20)\d{2}:[1-9]\d*$", identifier):
raise ValueError("Irregular identifier %s (after mangling)" % identifier)
return Literal(identifier)

Expand Down Expand Up @@ -282,6 +282,7 @@ def metadata_from_basefile(self, basefile):
attrib["rpubl:utrSerie"] = self.lookup_resource("SOU", SKOS.altLabel)
return attrib

@lazyread
def downloaded_to_intermediate(self, basefile):
intermediate_path = self.store.intermediate_path(basefile)
intermediate_dir = os.path.dirname(intermediate_path)
Expand Down
24 changes: 24 additions & 0 deletions ferenda/sources/legal/se/swedishlegalsource.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

from bz2 import BZ2File
from datetime import datetime, date
from functools import partial, wraps
from io import BytesIO, StringIO, BufferedIOBase
from urllib.parse import quote, unquote
from wsgiref.util import request_uri
Expand Down Expand Up @@ -80,7 +81,30 @@ def __new__(cls, *args, **kwargs):
def __bool__(self):
return False

class Lazyfile(object):

def __init__(self, constructor):
self.constructor = constructor
self.fp = None
self.patchdescription = None
self.closed = True

def __getattr__(self, name):
if self.fp is None:
self.fp = self.constructor()
self.patchdescription = self.fp.patchdescription
return getattr(self.fp, name)


def lazyread(f):
"""Don't call the wrapped function until someone actually tries to read from the fp that it should return."""
@wraps(f)
def wrapper(self, basefile):
p = partial(f, self, basefile)
fp = Lazyfile(p)
return fp
return wrapper

class SwedishLegalHandler(RequestHandler):
def supports(self, environ):
pathinfo = environ['PATH_INFO']
Expand Down
10 changes: 0 additions & 10 deletions lagen/nu/res/patches/sfs/patches/1974/152.patch

This file was deleted.

20 changes: 6 additions & 14 deletions lagen/nu/res/patches/sfs/patches/1999/1229.patch
Original file line number Diff line number Diff line change
@@ -1,30 +1,30 @@
--- data/sfs/intermediate/1999/1229.txt.bz2
+++
@@ -15354,6 +15354,7 @@ Korrigerad formattering av strecksatslistor i 44:12-13, styckeindelning i 44:21 samt tabell i 63:3
@@ -15176,6 +15176,7 @@ Korrigerad formattering av strecksatslistor i 44:12-13, styckeindelning i 44:21 samt tabell i 63:3
underliggande tillgångar eller av kursindex eller liknande.

12 § Med option avses en rätt för innehavaren att
+
- förvärva eller avyttra aktier, obligationer eller andra
tillgångar till ett bestämt pris, eller

@@ -15365,6 +15366,7 @@
@@ -15187,6 +15188,7 @@
Huvudregel

13 § Kapitalvinsten ska beräknas som skillnaden mellan
+
- ersättningen för den avyttrade tillgången eller för den
utfärdade förpliktelsen minskad med utgifterna för avyttringen
eller utfärdandet, och
@@ -15433,6 +15435,7 @@
@@ -15255,6 +15257,7 @@
avyttrats vid tidpunkten för utnyttjandet. Lag (2007:1419).

Tillgångar som förvärvas genom arv, gåva m.m.
+
21 § Om en tillgång förvärvas genom arv, testamente, gåva,
bodelning eller på liknande sätt, inträder förvärvaren i den
tidigare ägarens skattemässiga situation.
@@ -20879,15 +20882,19 @@
@@ -20610,15 +20613,19 @@
procent av det belopp med vilket
den fastställda förvärvsinkomsten
överstiger 0,99 prisbasbelopp
Expand All @@ -43,16 +43,8 @@
+
Lag (2011:1256).

3 a § /Upphör att gälla U:2018-01-01/
@@ -20933,6 +20940,7 @@

överstiger 12,43
prisbasbelopp 0,422 prisbasbelopp
+
Lag (2015:775).

3 a § /Träder i kraft I:2018-01-01/
@@ -20987,6 +20995,7 @@
3 a § För dem som vid beskattningsårets ingång har fyllt 65
@@ -20672,6 +20679,7 @@

överstiger 12,43
prisbasbelopp 0,422 prisbasbelopp
Expand Down
2 changes: 1 addition & 1 deletion test/integrationLagen.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ class TestLagen(unittest.TestCase, FerendaTestCase):

def assert_status(self, url, code):
res = requests.get(url, headers={'Accept': 'text/html'})
self.assertEqual(res.status_code, code)
self.assertEqual(res.status_code, code, url)
return res

def assert200(self, url):
Expand Down
2 changes: 1 addition & 1 deletion tools/nginx.conf
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ server {
rewrite_log on;
charset utf-8;
charset_types text/html text/xml;

client_max_body_size 4m;
location @uwsgi {
uwsgi_pass unix:///tmp/ferenda.sock;
include /etc/nginx/uwsgi_params;
Expand Down

0 comments on commit 5a4d0d9

Please sign in to comment.