forked from threatgrid/naga
-
Notifications
You must be signed in to change notification settings - Fork 2
/
data.clj
226 lines (182 loc) · 7.15 KB
/
data.clj
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
(ns ^{:doc "Converts external data into a graph format (triples)."
:author "Paula Gearon"}
naga.data
(:require [schema.core :as s :refer [=>]]
[clojure.java.io :as io]
[clojure.string :as str]
[cheshire.core :as j]
[naga.store :as store :refer [Storage]])
(:import [java.util Map List]))
(def ^:dynamic *current-storage* nil)
(def Triple [s/Any s/Keyword s/Any])
(defn get-naga-first
"Finds the naga/first property in a map, and gets the value."
[struct]
(let [first-pair? (fn [[k v :as p]]
(and (= "naga" (namespace k))
(str/starts-with? (name k) "first")
p))]
(some first-pair? struct)))
(s/defn containership-triples
"Finds the list of entity nodes referred to in a list and builds
triples describing a flat 'contains' property"
[node :- s/Any
triples :- [Triple]]
(let [listmap (->> (group-by first triples)
(map (fn [[k vs]]
[k (into {} (map #(apply vector (rest %)) vs))]))
(into {}))
node-list (loop [nl [] n node]
(if-not n
nl
(let [{r :naga/rest :as lm} (listmap n)
[_ f] (get-naga-first lm)]
(recur (conj nl f) r))))]
(map
(fn [n] [node (store/container-property *current-storage* n) n])
node-list)))
(defmulti value-triples
"Converts a value into a list of triples.
Return the entity ID of the data coupled with the sequence of triples.
NOTE: This may need to be dispatched to storage.
e.g. Datomic could use properties to determine how to encode data."
type)
(declare map->triples)
(s/defn list-triples
"Creates the triples for a list"
[[v & vs :as vlist]]
(if (seq vlist)
(let [id (store/new-node *current-storage*)
[value-id triples] (value-triples v)
[next-id next-triples] (list-triples vs)]
[id (concat [[id (store/data-property *current-storage* value-id) value-id]]
(when next-id [[id :naga/rest next-id]])
triples
next-triples)])))
(defmethod value-triples List
[vlist]
(let [[node triples :as raw-result] (list-triples vlist)]
(if triples
[node (concat triples (containership-triples node triples))]
raw-result)))
(defmethod value-triples Map [v] (map->triples v))
(defmethod value-triples nil [v] nil)
(defmethod value-triples :default [v] [v nil])
(s/defn property-vals :- [Triple]
"Takes a property-value pair associated with an entity,
and builds triples around it"
[entity-id :- s/Any
[property value] :- [s/Keyword s/Any]]
(if-let [[value-id value-data] (value-triples value)]
(cons [entity-id property value-id] value-data)))
(s/defn map->triples :- [s/Any [Triple]]
"Converts a single map to triples. Returns a pair of the map's ID and the triples for the map."
[data :- {s/Keyword s/Any}]
(let [entity-id (or (:db/id data) (store/new-node *current-storage*))
triples-data (mapcat (partial property-vals entity-id)
data)]
[entity-id triples-data]))
(s/defn name-for
"Convert an id (probably a number) to a keyword for identification"
[id :- s/Any]
(if (keyword? id)
id
(store/node-label *current-storage* id)))
(s/defn ident-map->triples :- [s/Any [Triple]]
"Converts a single map to triples for an ID'ed map"
[j]
(let [[id triples] (map->triples j)]
(if (:db/ident j)
triples
(concat [[id :db/ident (name-for id)] [id :naga/entity true]] triples))))
(s/defn json->triples :- [Triple]
"Converts parsed JSON into a sequence of triples for a provided storage."
[storage j]
(binding [*current-storage* storage]
(doall (mapcat ident-map->triples j))))
(s/defn stream->triples :- [Triple]
"Converts a stream to triples relevant to a store"
[storage io]
(with-open [r (io/reader io)]
(let [data (j/parse-stream r true)]
(json->triples storage data))))
(s/defn string->triples :- [Triple]
"Converts a string to triples relevant to a store"
[storage :- Storage
s :- s/Str]
(json->triples storage (j/parse-string s true)))
;; extracting from the store
(s/defn property-values :- [[s/Keyword s/Any]]
"Return all the property/value pairs for a given entity in the store."
[store :- Storage
entity :- s/Any]
(store/resolve-pattern store [entity '?p '?o]))
(s/defn check-structure :- (s/maybe [[s/Keyword s/Any]])
"Determines if a value represents a structure. If so, return the property/values for it.
Otherwise, return nil."
[store :- Storage
prop :- s/Any
v :- s/Any]
(if (and (not (#{:db/ident :db/id} prop)) (store/node-type? store prop v))
(let [data (property-values store v)]
(seq data))))
(declare pairs->json recurse-node)
(s/defn build-list :- [s/Any]
"Takes property/value pairs and if they represent a list node, returns the list.
else, nil."
[store :- Storage
pairs :- [[s/Keyword s/Any]]]
;; convert the data to a map
(let [st (into {} pairs)]
;; if the properties indicate a list, then process it
(when-let [first-prop-elt (get-naga-first st)]
(let [remaining (:naga/rest st)
[_ first-elt] (recurse-node store first-prop-elt)]
(assert first-elt)
;; recursively build the list
(if remaining
(cons first-elt (build-list store (property-values store remaining)))
(list first-elt))))))
(s/defn recurse-node :- s/Any
"Determines if the val of a map entry is a node to be recursed on, and loads if necessary"
[store :- Storage
[prop v :as prop-val] :- [s/Keyword s/Any]]
(if-let [pairs (check-structure store prop v)]
[prop (or (build-list store pairs)
(pairs->json store pairs))]
prop-val))
(s/defn pairs->json :- {s/Keyword s/Any}
"Uses a set of property-value pairs to load up a nested data structure from the graph"
[store :- Storage
prop-vals :- [[s/Keyword s/Any]]]
(dissoc
(->> prop-vals
(map (partial recurse-node store))
(into {}))
:db/id
:db/ident
:naga/entity))
(s/defn id->json :- {s/Keyword s/Any}
"Uses an id node to load up a nested data structure from the graph"
[store :- Storage
entity-id :- s/Any]
(pairs->json store (property-values store entity-id)))
(s/defn ident->json :- {s/Keyword s/Any}
"Converts data in a database to data structures suitable for JSON encoding"
[store :- Storage
ident :- s/Any]
;; find the entity by its ident. Some systems will make the id the entity id,
;; and the ident will be separate, so look for both.
(let [eid (or (ffirst (store/resolve-pattern store '[?eid :db/id ident]))
(ffirst (store/resolve-pattern store '[?eid :db/ident ident])))]
(id->json store eid)))
(s/defn store->json :- [{s/Keyword s/Any}]
"Pulls all top level JSON out of a store"
[store :- Storage]
(->> (store/query store '[?e] '[[?e :naga/entity true] [?e :db/ident ?id]])
(map first)
(map (partial id->json store))))
(s/defn store->str :- s/Str
"Reads a store into JSON strings"
[store :- Storage]
(j/generate-string (store->json store)))