Permalink
Switch branches/tags
Nothing to show
Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
206 lines (174 sloc) 7.5 KB
#!/usr/bin/env python
# -*- coding: iso-8859-1 -*-
"""This module constructs URIs for a document based on the properties
of that document. Alternatively, given a URI for a document, parse the
different properties for the document"""
# system libs
import sys
import re
from pprint import pformat
# 3rdparty libs
from rdflib import Literal, Namespace, URIRef, RDF, RDFS
try:
from rdflib.Graph import Graph
from rdflib.BNode import BNode
except ImportError:
from rdflib import Graph
from rdflib import BNode
# my own libraries
from FilebasedTester import FilebasedTester
from DispatchMixin import DispatchMixin
from LegalRef import LegalRef
import Util
RINFO = Namespace(Util.ns['rinfo'])
RINFOEX = Namespace(Util.ns['rinfoex'])
DCT = Namespace(Util.ns['dct'])
# Maps keys used by the internal dictionaries that LegalRef
# constructs, which in turn are modelled after production rule names
# in the EBNF grammar.
predicate = {"type": RDF.type,
"publikation": RINFO["rattsfallspublikation"],
"artal": RINFO["artal"],
"lopnummer": RINFO["publikationsordinal"],
"sidnummer": RINFO["sidnummer"],
"law": RINFO["fsNummer"],
"chapter": RINFOEX["kapitelnummer"],
"section": RINFOEX["paragrafnummer"],
"piece": RINFOEX["styckenummer"],
"item": RINFOEX["punktnummer"],
"myndighet": DCT["creator"],
"dnr": RINFO["diarienummer"]}
dictkey = dict([[v,k] for k,v in predicate.items()])
types = {LegalRef.RATTSFALL: RINFO["Rattsfallsreferat"],
LegalRef.LAGRUM: RINFO["KonsolideradGrundforfattning"],
LegalRef.MYNDIGHETSBESLUT: RINFO["Myndighetsavgorande"]}
dicttypes = dict([[v,k] for k,v in types.items()])
patterns = {LegalRef.RATTSFALL:
re.compile("http://rinfo.lagrummet.se/publ/rattsfall/(?P<publikation>\w+)/(?P<artal>\d+)(s(?P<sidnummer>\d+)|((:| nr | ref )(?P<lopnummer>\d+)))").match,
LegalRef.MYNDIGHETSBESLUT:
re.compile("http://rinfo.lagrummet.se/publ/beslut/(?P<myndighet>\w+)/(?P<dnr>.*)").match,
LegalRef.LAGRUM:
re.compile("http://rinfo.lagrummet.se/publ/sfs/(?P<law>\d{4}:\w+)#?(K(?P<chapter>[0-9a-z]+))?(P(?P<section>[0-9a-z]+))?(S(?P<piece>[0-9a-z]+))?(N(?P<item>[0-9a-z]+))?").match
}
# The dictionary should be a number of properties of the document we
# wish to construct the URI for, e.g:
# {"type": LegalRef.RATTSFALL,
# "publikation": "nja",
# "artal": "2004"
# "sidnr": "43"}
#
# The output is a URI string like 'http://rinfo.lagrummet.se/publ/rattsfall/nja/2004s43'
def construct(dictionary):
# Step 1: massage the data to a rdflib graph
graph = Graph()
bnode = BNode()
for key in dictionary:
if key == "type":
graph.add((bnode,RDF.type,URIRef(types[dictionary[key]])))
else:
graph.add((bnode, predicate[key], Literal(dictionary[key])))
# print graph.serialize(format="nt")
return construct_from_graph(graph)
def _first_obj(graph,subject,predicate):
l = list(graph.objects(subject,predicate))
if not l:
#raise ValueError("No objects with predicate %s found in the graph" % predicate)
return None
else:
return l[0]
def construct_from_graph(graph):
# assume every triple in the graph has the same bnode as subject
bnode = list(graph)[0][0]
assert(isinstance(bnode,BNode))
# maybe we should just move the triples into a dict keyed on predicate?
rdftype = _first_obj(graph,bnode,RDF.type)
if rdftype == RINFO["Rattsfallsreferat"]:
publ = _first_obj(graph,bnode,RINFO["rattsfallspublikation"])
if publ == "nja":
uripart = "%s/%ss%s" % (publ,
_first_obj(graph,bnode,RINFO["artal"]),
_first_obj(graph,bnode,RINFO["sidnummer"]))
else:
uripart = "%s/%s:%s" % (publ,
_first_obj(graph,bnode,RINFO["artal"]),
_first_obj(graph,bnode,RINFO["publikationsordinal"]))
return "http://rinfo.lagrummet.se/publ/rattsfall/%s" % uripart
elif rdftype == RINFO["KonsolideradGrundforfattning"]:
# print graph.serialize(format="n3")
attributeorder = [RINFOEX["kapitelnummer"],
RINFOEX["paragrafnummer"],
RINFOEX["styckenummer"],
RINFOEX["punktnummer"]]
signs = {RINFOEX["kapitelnummer"]: 'K',
RINFOEX["paragrafnummer"]: 'P',
RINFOEX["styckenummer"]: 'S',
RINFOEX["punktnummer"]: 'N'}
urifragment = _first_obj(graph,bnode,RINFO["fsNummer"])
for key in attributeorder:
if _first_obj(graph,bnode,key):
if "#" not in urifragment: urifragment += "#"
urifragment += signs[key] + _first_obj(graph,bnode,key)
return "http://rinfo.lagrummet.se/publ/sfs/%s" % urifragment
elif rdftype == RINFO["Myndighetsavgorande"]:
return "http://rinfo.lagrummet.se/publ/beslut/%s/%s" % \
(_first_obj(graph,bnode,DCT["creator"]),
_first_obj(graph,bnode,RINFO["diarienummer"]))
else:
raise ValueError("Don't know how to construct a uri for %s" % rdftype)
def parse(uri):
graph = parse_to_graph(uri)
dictionary = {}
for (subj,pred,obj) in graph:
if pred == RDF.type:
dictionary["type"] = dicttypes[obj]
else:
dictionary[dictkey[pred]] = unicode(obj)
return dictionary
def parse_to_graph(uri):
dictionary = None
for (pid, pattern) in patterns.items():
m = pattern(uri)
if m:
dictionary = m.groupdict()
dictionary["type"] = pid
break
if not dictionary:
raise ValueError("Can't parse URI %s" % uri)
graph = Graph()
for key, value in Util.ns.items():
graph.bind(key, Namespace(value));
bnode = BNode()
for key in dictionary:
if dictionary[key] == None:
continue
if key.startswith("_"):
continue
if key == "type":
graph.add((bnode,RDF.type,URIRef(types[dictionary[key]])))
else:
graph.add((bnode, predicate[key], Literal(dictionary[key])))
return graph
class Tester(FilebasedTester):
# By using the same set of tests, but switching which file
# contains the testdata and which contains the answer, we get a
# nice roundtrip test
testparams = {'Parse': {'dir': u'test/LegalURI',
'testext':'.txt',
'answerext':'.py'},
'Construct': {'dir': u'test/LegalURI',
'testext':'.py',
'answerext':'.txt'}}
def TestConstruct(self,data):
# All test case writers are honorable, noble and thorough
# persons, but just in case, let's make eval somewhat safer.
data = data.strip().replace("\r\n", " ")
d = eval(data,{"__builtins__":None},globals())
uri = construct(d)
return uri
def TestParse(self,uri):
d = parse(uri.strip())
return pformat(d)
if __name__ == "__main__":
Tester.__bases__ += (DispatchMixin,)
t = Tester()
t.Dispatch(sys.argv)