forked from cayleygraph/cayley
/
hasa.go
289 lines (257 loc) · 8.49 KB
/
hasa.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
// Copyright 2014 The Cayley Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package iterator
// Defines one of the base iterators, the HasA iterator. The HasA takes a
// subiterator of links, and acts as an iterator of nodes in the given
// direction. The name comes from the idea that a "link HasA subject" or a "link
// HasA predicate".
//
// HasA is weird in that it may return the same value twice if on the Next()
// path. That's okay -- in reality, it can be viewed as returning the value for
// a new quad, but to make logic much simpler, here we have the HasA.
//
// Likewise, it's important to think about Contains()ing a HasA. When given a
// value to check, it means "Check all predicates that have this value for your
// direction against the subiterator." This would imply that there's more than
// one possibility for the same Contains()ed value. While we could return the
// number of options, it's simpler to return one, and then call NextPath()
// enough times to enumerate the options. (In fact, one could argue that the
// raison d'etre for NextPath() is this iterator).
//
// Alternatively, can be seen as the dual of the LinksTo iterator.
import (
"github.com/cayleygraph/cayley/clog"
"github.com/cayleygraph/cayley/graph"
"github.com/cayleygraph/cayley/quad"
)
// A HasA consists of a reference back to the graph.QuadStore that it references,
// a primary subiterator, a direction in which the quads for that subiterator point,
// and a temporary holder for the iterator generated on Contains().
type HasA struct {
uid uint64
tags graph.Tagger
qs graph.QuadStore
primaryIt graph.Iterator
dir quad.Direction
resultIt graph.Iterator
result graph.Value
runstats graph.IteratorStats
err error
}
// Construct a new HasA iterator, given the quad subiterator, and the quad
// direction for which it stands.
func NewHasA(qs graph.QuadStore, subIt graph.Iterator, d quad.Direction) *HasA {
return &HasA{
uid: NextUID(),
qs: qs,
primaryIt: subIt,
dir: d,
}
}
func (it *HasA) UID() uint64 {
return it.uid
}
// Return our sole subiterator.
func (it *HasA) SubIterators() []graph.Iterator {
return []graph.Iterator{it.primaryIt}
}
func (it *HasA) Reset() {
it.primaryIt.Reset()
if it.resultIt != nil {
it.resultIt.Close()
}
}
func (it *HasA) Tagger() *graph.Tagger {
return &it.tags
}
func (it *HasA) Clone() graph.Iterator {
out := NewHasA(it.qs, it.primaryIt.Clone(), it.dir)
out.tags.CopyFrom(it)
return out
}
// Direction accessor.
func (it *HasA) Direction() quad.Direction { return it.dir }
// Pass the Optimize() call along to the subiterator. If it becomes Null,
// then the HasA becomes Null (there are no quads that have any directions).
func (it *HasA) Optimize() (graph.Iterator, bool) {
newPrimary, changed := it.primaryIt.Optimize()
if changed {
it.primaryIt = newPrimary
if it.primaryIt.Type() == graph.Null {
return it.primaryIt, true
}
}
// Ask the graph.QuadStore if we can be replaced. Often times, this is a great
// optimization opportunity (there's a fixed iterator underneath us, for
// example).
newReplacement, hasOne := it.qs.OptimizeIterator(it)
if hasOne {
it.Close()
return newReplacement, true
}
return it, false
}
// Pass the TagResults down the chain.
func (it *HasA) TagResults(dst map[string]graph.Value) {
for _, tag := range it.tags.Tags() {
dst[tag] = it.Result()
}
for tag, value := range it.tags.Fixed() {
dst[tag] = value
}
it.primaryIt.TagResults(dst)
}
func (it *HasA) Describe() graph.Description {
primary := it.primaryIt.Describe()
return graph.Description{
UID: it.UID(),
Type: it.Type(),
Tags: it.tags.Tags(),
Direction: it.dir,
Iterator: &primary,
}
}
// Check a value against our internal iterator. In order to do this, we must first open a new
// iterator of "quads that have `val` in our direction", given to us by the quad store,
// and then Next() values out of that iterator and Contains() them against our subiterator.
func (it *HasA) Contains(val graph.Value) bool {
graph.ContainsLogIn(it, val)
it.runstats.Contains += 1
if clog.V(4) {
clog.Infof("Id is %v", it.qs.NameOf(val))
}
// TODO(barakmich): Optimize this
if it.resultIt != nil {
it.resultIt.Close()
}
it.resultIt = it.qs.QuadIterator(it.dir, val)
ok := it.NextContains()
if it.err != nil {
return false
}
return graph.ContainsLogOut(it, val, ok)
}
// NextContains() is shared code between Contains() and GetNextResult() -- calls next on the
// result iterator (a quad iterator based on the last checked value) and returns true if
// another match is made.
func (it *HasA) NextContains() bool {
for it.resultIt.Next() {
it.runstats.ContainsNext += 1
link := it.resultIt.Result()
if clog.V(4) {
clog.Infof("Quad is %v", it.qs.Quad(link))
}
if it.primaryIt.Contains(link) {
it.result = it.qs.QuadDirection(link, it.dir)
return true
}
}
it.err = it.resultIt.Err()
return false
}
// Get the next result that matches this branch.
func (it *HasA) NextPath() bool {
// Order here is important. If the subiterator has a NextPath, then we
// need do nothing -- there is a next result, and we shouldn't move forward.
// However, we then need to get the next result from our last Contains().
//
// The upshot is, the end of NextPath() bubbles up from the bottom of the
// iterator tree up, and we need to respect that.
if clog.V(4) {
clog.Infof("HASA %v NextPath", it.UID())
}
if it.primaryIt.NextPath() {
return true
}
it.err = it.primaryIt.Err()
if it.err != nil {
return false
}
result := it.NextContains() // Sets it.err if there's an error
if it.err != nil {
return false
}
if clog.V(4) {
clog.Infof("HASA %v NextPath Returns %v", it.UID(), result)
}
return result
}
// Next advances the iterator. This is simpler than Contains. We have a
// subiterator we can get a value from, and we can take that resultant quad,
// pull our direction out of it, and return that.
func (it *HasA) Next() bool {
graph.NextLogIn(it)
it.runstats.Next += 1
if it.resultIt != nil {
it.resultIt.Close()
}
it.resultIt = &Null{}
if !it.primaryIt.Next() {
it.err = it.primaryIt.Err()
return graph.NextLogOut(it, false)
}
tID := it.primaryIt.Result()
val := it.qs.QuadDirection(tID, it.dir)
it.result = val
return graph.NextLogOut(it, true)
}
func (it *HasA) Err() error {
return it.err
}
func (it *HasA) Result() graph.Value {
return it.result
}
// GetStats() returns the statistics on the HasA iterator. This is curious. Next
// cost is easy, it's an extra call or so on top of the subiterator Next cost.
// ContainsCost involves going to the graph.QuadStore, iterating out values, and hoping
// one sticks -- potentially expensive, depending on fanout. Size, however, is
// potentially smaller. we know at worst it's the size of the subiterator, but
// if there are many repeated values, it could be much smaller in totality.
func (it *HasA) Stats() graph.IteratorStats {
subitStats := it.primaryIt.Stats()
// TODO(barakmich): These should really come from the quadstore itself
// and be optimized.
faninFactor := int64(1)
fanoutFactor := int64(30)
nextConstant := int64(2)
quadConstant := int64(1)
return graph.IteratorStats{
NextCost: quadConstant + subitStats.NextCost,
ContainsCost: (fanoutFactor * nextConstant) * subitStats.ContainsCost,
Size: faninFactor * subitStats.Size,
ExactSize: false,
Next: it.runstats.Next,
Contains: it.runstats.Contains,
ContainsNext: it.runstats.ContainsNext,
}
}
// Close the subiterator, the result iterator (if any) and the HasA. It closes
// all subiterators it can, but returns the first error it encounters.
func (it *HasA) Close() error {
err := it.primaryIt.Close()
if it.resultIt != nil {
_err := it.resultIt.Close()
if err == nil {
err = _err
}
}
return err
}
// Register this iterator as a HasA.
func (it *HasA) Type() graph.Type { return graph.HasA }
func (it *HasA) Size() (int64, bool) {
st := it.Stats()
return st.Size, st.ExactSize
}
var _ graph.Iterator = &HasA{}