Skip to content

Commit

Permalink
devel can now output a decent csv with selectable fields
Browse files Browse the repository at this point in the history
  • Loading branch information
staffanm committed Nov 22, 2018
1 parent 101a9f4 commit 9c93fed
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 13 deletions.
29 changes: 18 additions & 11 deletions ferenda/devel.py
Original file line number Diff line number Diff line change
Expand Up @@ -737,7 +737,7 @@ def dumpstore(self, format="turtle"):
# sub.debug('Sublog message at DEBUG level')

@decorators.action
def csvinventory(self, alias):
def csvinventory(self, alias, predicates=None):
"""Create an inventory of documents, as a CSV file.
Only documents that have been parsed and yielded some minimum
Expand All @@ -747,14 +747,18 @@ def csvinventory(self, alias):
:type alias: str
"""
predicates = ['basefile',
'subobjects', # sections that have rdf:type
'rdf:type',
'dcterms:identifier',
'dcterms:title',
'dcterms:published',
'prov:wasGeneratedBy',
]
if predicates is None:
predicates = ['basefile',
'subobjects', # sections that have rdf:type
'rdf:type',
'dcterms:identifier',
'dcterms:title',
'dcterms:published',
'prov:wasGeneratedBy',
]
else:
# predicates are given as a comma separated list, eg ./ferenda-build.py devel csvinventory kkv rpubl:malnummer,rpubl:avgorandedatum,rinfoex:instanstyp,rinfoex:domstol,rinfoex:upphandlande,rinfoex:leverantor,rinfoex:arendetyp,rinfoex:avgorande
predicates = predicates.split(",")
import csv
# if six.PY2:
# delimiter = b';'
Expand All @@ -771,7 +775,9 @@ def csvinventory(self, alias):
for basefile in repo.store.list_basefiles_for("relate"):
baseuri = URIRef(repo.canonical_uri(basefile))
with repo.store.open_distilled(basefile) as fp:
row = {'basefile': basefile}
row = {}
if 'basefile' in predicates:
row['basefile'] = basefile
g = Graph().parse(fp, format="xml")
for (p, o) in g.predicate_objects(baseuri):
qname = g.qname(p)
Expand All @@ -787,7 +793,8 @@ def csvinventory(self, alias):
# if six.PY2:
# fld = fld.encode("latin-1", errors="replace")
row[qname] = fld
row['subobjects'] = len(list(g.subject_objects(RDF.type)))
if 'subobjects' in predicates:
row['subobjects'] = len(list(g.subject_objects(RDF.type)))
writer.writerow(row)

def _repo_from_alias(self, alias, datadir=None, repoconfig=None):
Expand Down
11 changes: 9 additions & 2 deletions ferenda/sources/legal/se/kkv.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,14 @@ def extract_head(self, fp, basefile):
def infer_identifier(self, basefile):
return self.identifiers[basefile]

lblmap = {"Domstol:": "dcterms:publisher", # really :creator (KKV is the :publisher) but swedishlegalsource.space.ttl isn't written like that...
lblmap = {"Domstol:": "rinfoex:domstol", # this ad-hoc predicate
# keeps
# attributes_to_resource
# from converting the
# string into a URI,
# which we'd like to
# avoid for now
"Instans:": "rinfoex:instanstyp",
"Målnummer:": "rpubl:malnummer",
"Ärendemening:": "dcterms:title",
"Beslutsdatum:": "rpubl:avgorandedatum",
Expand All @@ -141,7 +148,7 @@ def extract_metadata(self, rawhead, basefile):
assert lbl.endswith(":"), "invalid label %s" % lbl
d[self.lblmap[lbl]] = value
d["dcterms:issued"] = d["rpubl:avgorandedatum"]
self.identifiers[basefile] = "%ss dom den %s i mål %s" % (d["dcterms:publisher"],
self.identifiers[basefile] = "%ss dom den %s i mål %s" % (d["rinfoex:domstol"],
d["rpubl:avgorandedatum"],
d["rpubl:malnummer"])
return d
Expand Down

0 comments on commit 9c93fed

Please sign in to comment.