-
-
Notifications
You must be signed in to change notification settings - Fork 13
/
persistent_sorted_set.clj
174 lines (138 loc) · 6.29 KB
/
persistent_sorted_set.clj
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
(ns ^{:author "Nikita Prokopov"
:doc "A B-tree based persistent sorted set. Supports transients, custom comparators, fast iteration, efficient slices (iterator over a part of the set) and reverse slices. Almost a drop-in replacement for [[clojure.core/sorted-set]], the only difference being this one can’t store nil."}
me.tonsky.persistent-sorted-set
(:refer-clojure :exclude [conj disj sorted-set sorted-set-by])
(:require
[me.tonsky.persistent-sorted-set.arrays :as arrays])
(:import
[clojure.lang RT]
[java.lang.ref SoftReference]
[java.util Comparator Arrays]
[java.util.function BiConsumer]
[me.tonsky.persistent_sorted_set ANode ArrayUtil Branch IStorage Leaf PersistentSortedSet Seq]))
(set! *warn-on-reflection* true)
(defn conj
"Analogue to [[clojure.core/conj]] but with comparator that overrides the one stored in set."
[^PersistentSortedSet set key ^Comparator cmp]
(.cons set key cmp))
(defn disj
"Analogue to [[clojure.core/disj]] with comparator that overrides the one stored in set."
[^PersistentSortedSet set key ^Comparator cmp]
(.disjoin set key cmp))
(defn slice
"An iterator for part of the set with provided boundaries.
`(slice set from to)` returns iterator for all Xs where from <= X <= to.
`(slice set from nil)` returns iterator for all Xs where X >= from.
Optionally pass in comparator that will override the one that set uses. Supports efficient [[clojure.core/rseq]]."
([^PersistentSortedSet set from to]
(.slice set from to))
([^PersistentSortedSet set from to ^Comparator cmp]
(.slice set from to cmp)))
(defn rslice
"A reverse iterator for part of the set with provided boundaries.
`(rslice set from to)` returns backwards iterator for all Xs where from <= X <= to.
`(rslice set from nil)` returns backwards iterator for all Xs where X <= from.
Optionally pass in comparator that will override the one that set uses. Supports efficient [[clojure.core/rseq]]."
([^PersistentSortedSet set from to]
(.rslice set from to))
([^PersistentSortedSet set from to ^Comparator cmp]
(.rslice set from to cmp)))
(defn seek
"An efficient way to seek to a specific key in a seq (either returned by [[clojure.core.seq]] or a slice.)
`(seek (seq set) to)` returns iterator for all Xs where to <= X.
Optionally pass in comparator that will override the one that set uses."
([seq to]
(.seek ^Seq seq to))
([seq to cmp]
(.seek ^Seq seq to ^Comparator cmp)))
(defn- array-from-indexed [coll type from to]
(cond
(instance? clojure.lang.Indexed coll)
(ArrayUtil/indexedToArray type coll from to)
(arrays/array? coll)
(Arrays/copyOfRange coll from to (arrays/array-type type))))
(defn- split
([coll to type avg max]
(persistent! (split (transient []) 0 coll to type avg max)))
([res from coll to type avg max]
(let [len (- to from)]
(cond
(== 0 len)
res
(>= len (* 2 avg))
(recur (conj! res (array-from-indexed coll type from (+ from avg))) (+ from avg) coll to type avg max)
(<= len max)
(conj! res (array-from-indexed coll type from to))
:else
(-> res
(conj! (array-from-indexed coll type from (+ from (quot len 2))))
(conj! (array-from-indexed coll type (+ from (quot len 2)) to)))))))
(defn from-sorted-array
"Fast path to create a set if you already have a sorted array of elements on your hands."
([^Comparator cmp keys]
(from-sorted-array cmp keys (arrays/alength keys)))
([^Comparator cmp keys len]
(let [max PersistentSortedSet/MAX_LEN
avg (quot (+ PersistentSortedSet/MIN_LEN max) 2)
storage nil
edit nil
->Leaf (fn [keys]
(Leaf. (count keys) keys edit))
->Branch (fn [level ^objects children]
(Branch.
level
(count children)
^objects (arrays/amap #(.maxKey ^ANode %) Object children)
nil
children
edit))]
(loop [level 1
nodes (mapv ->Leaf (split keys len Object avg max))]
(case (count nodes)
0 (PersistentSortedSet. cmp)
1 (PersistentSortedSet. {} cmp nil storage (first nodes) len edit 0)
(recur (inc level) (mapv #(->Branch level %) (split nodes (count nodes) Object avg max))))))))
(defn from-sequential
"Create a set with custom comparator and a collection of keys. Useful when you don’t want to call [[clojure.core/apply]] on [[sorted-set-by]]."
[^Comparator cmp keys]
(let [arr (to-array keys)
_ (arrays/asort arr cmp)
len (ArrayUtil/distinct cmp arr)]
(from-sorted-array cmp arr len)))
(defn sorted-set-by
"Create a set with custom comparator."
([cmp] (PersistentSortedSet. ^Comparator cmp))
([cmp & keys] (from-sequential cmp keys)))
(defn sorted-set
"Create a set with default comparator."
([] (PersistentSortedSet/EMPTY))
([& keys] (from-sequential compare keys)))
(defn restore-by
"Constructs lazily-loaded set from storage, root address and custom comparator.
Supports all operations that normal in-memory impl would,
will fetch missing nodes by calling IStorage::restore when needed"
[cmp address ^IStorage storage]
(PersistentSortedSet. nil cmp address storage nil -1 nil 0))
(defn restore
"Constructs lazily-loaded set from storage and root address.
Supports all operations that normal in-memory impl would,
will fetch missing nodes by calling IStorage::restore when needed"
[address ^IStorage storage]
(restore-by RT/DEFAULT_COMPARATOR address storage))
(defn walk-addresses
"Visit each address used by this set. Usable for cleaning up
garbage left in storage from previous versions of the set"
[^PersistentSortedSet set consume-fn]
(.walkAddresses set consume-fn))
(defn store
"Store each not-yet-stored node by calling IStorage::store and remembering
returned address. Incremental, won’t store same node twice on subsequent calls.
Returns root address. Remember it and use it for restore"
([^PersistentSortedSet set]
(.store set))
([^PersistentSortedSet set ^IStorage storage]
(.store set storage)))
(defn set-branching-factor!
"Global -- applies to all sets. Must be power of 2. Defaults to 64"
[n]
(PersistentSortedSet/setMaxLen n))