/
frir.py
107 lines (86 loc) · 3.67 KB
/
frir.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
from builtins import str
import sadi
import rdflib
import setlr
from datetime import datetime
from depot.manager import DepotManager
from .update_change_service import UpdateChangeService
from nanopub import Nanopublication
from datastore import create_id
import flask
from flask import render_template
from flask import render_template_string
import logging
import sys, traceback
import database
import tempfile
from depot.io.interfaces import StoredFile
from whyis.namespace import *
import hashlib
from uuid import uuid4
from rdflib import compare
pexp = rdflib.Namespace('tag:tw.rpi.edu,2011:expression_rgda1-sha256-')
pmanif = rdflib.Namespace('tag:tw.rpi.edu,2011:manifestation_sha256-')
nfo = rdflib.Namespace('http://www.semanticdesktop.org/ontologies/2007/03/22/nfo#')
def rgda1_digest(graph):
g = rdflib.Graph()
g += graph
ig = compare.to_isomorphic(g)
return ig.graph_digest()
def uuid(graph):
return int(uuid4())
def sha256(f):
h = hashlib.sha256()
for chunk in iter(lambda: f.read(4096), b""):
h.update(chunk)
hd = h.hexdigest()
result = int(hd, 16)
return result
class FRIRArchiver(UpdateChangeService):
activity_class = whyis.Archive
def __init__(self, expression_digest=rgda1_digest, manifestation_digest=sha256):
self.expression_digest = expression_digest
self.manifestation_digest = manifestation_digest
def getInputClass(self):
return np.Nanopublication
def getOutputClass(self):
return whyis.ArchivedNanopublication
def get_query(self):
return '''
prefix setl: <http://purl.org/twc/vocab/setl/>
select distinct ?resource where {
?resource a np:Nanopublication.
filter not exists {
?resource a whyis:FRIRNanopublication.
}
filter not exists {
?resource a whyis:ArchivedNanopublication.
}
}'''
def process_nanopub(self, i, o, new_np):
assertion = i.graph
nanopub = Nanopublication(i.graph.store, i.identifier)
quads = nanopub.serialize(format="nquads")
i.identifier.split('/')[-1]
fileid = self.app.nanopub_depot.create(quads, i.identifier.split('/')[-1]+'.nq', "application/n-quads")
o.add(rdflib.RDF.type, whyis.ArchivedNanopublication)
new_np.pubinfo.add((new_np.identifier, rdflib.RDF.type, whyis.FRIRNanopublication))
expressions = dict([(part.identifier, self.expression_digest(part))
for part in [nanopub.assertion, nanopub.provenance, nanopub.pubinfo]])
expressions[nanopub.identifier] = sum(expressions.values())
nanopub_expression_uri = pexp[hex(expressions[nanopub.identifier])[2:]]
for work, expression in expressions.items():
exp = pexp[hex(expression)[2:]]
o.graph.add((work, frbr.realization, exp))
o.graph.add((work, rdflib.RDF.type, frbr.Work))
o.graph.add((exp, rdflib.RDF.type, frbr.Expression))
with self.app.nanopub_depot.get(fileid) as stored_file:
manifestation_id = self.manifestation_digest(stored_file)
manifestation = pmanif[hex(manifestation_id)[2:]]
o.graph.add((nanopub_expression_uri, frbr.embodiment, manifestation))
o.graph.add((manifestation, rdflib.RDF.type, pv.File))
o.graph.add((manifestation, whyis.hasFileID, rdflib.Literal(fileid)))
o.graph.add((manifestation, dc.created, rdflib.Literal(datetime.utcnow())))
o.graph.add((manifestation, NS.ov.hasContentType, rdflib.Literal("application/n-quads")))
o.graph.add((manifestation, rdflib.RDF.type, NS.mediaTypes["application/n-quads"]))
o.graph.add((NS.mediaTypes["application/n-quads"], rdflib.RDF.type, dc.FileFormat))