-
Notifications
You must be signed in to change notification settings - Fork 153
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #2244 from art-w/out-of-order
- Loading branch information
Showing
8 changed files
with
219 additions
and
76 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,133 @@ | ||
(* | ||
* Copyright (c) 2022-2023 Tarides <contact@tarides.com> | ||
* | ||
* Permission to use, copy, modify, and distribute this software for any | ||
* purpose with or without fee is hereby granted, provided that the above | ||
* copyright notice and this permission notice appear in all copies. | ||
* | ||
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | ||
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | ||
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | ||
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | ||
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | ||
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | ||
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | ||
*) | ||
|
||
open! Import | ||
|
||
type range = { off : int63; len : int63 } | ||
|
||
module Stack = struct | ||
type t = Empty | Stack of { mutable len : int; arr : int63 array; prev : t } | ||
|
||
let capacity = 131_072 (* = 128*1024, a large but not too large chunk size *) | ||
let make prev = Stack { len = 0; arr = Array.make capacity Int63.zero; prev } | ||
let is_full = function Empty -> true | Stack s -> s.len >= capacity | ||
|
||
let rec push x t = | ||
match t with | ||
| Stack s when not (is_full t) -> | ||
let i = s.len in | ||
s.len <- i + 2; | ||
s.arr.(i) <- x.off; | ||
s.arr.(i + 1) <- x.len; | ||
t | ||
| _ -> push x (make t) | ||
|
||
let rec to_seq t () = | ||
match t with | ||
| Empty -> Seq.Nil | ||
| Stack { len; arr; prev } -> | ||
assert (len mod 2 = 0); | ||
let rec go i () = | ||
if i < 0 then to_seq prev () | ||
else | ||
let range = { off = arr.(2 * i); len = arr.((2 * i) + 1) } in | ||
Seq.Cons (range, go (i - 1)) | ||
in | ||
go ((len / 2) - 1) () | ||
end | ||
|
||
type t = { | ||
mutable last : range option; | ||
mutable ranges : Stack.t; | ||
mutable count : int; | ||
mutable out_of_order : range list; | ||
} | ||
|
||
let make () = | ||
{ last = None; ranges = Stack.Empty; count = 0; out_of_order = [] } | ||
|
||
let count t = t.count | ||
|
||
let add ~off ~len t = | ||
t.count <- t.count + 1; | ||
let open Int63.Syntax in | ||
let len = Int63.of_int len in | ||
match t.last with | ||
| None -> t.last <- Some { off; len } | ||
| Some last when off + len = last.off -> | ||
(* latest interval can be fused with the previous one *) | ||
t.last <- Some { off; len = len + last.len } | ||
| Some last when off + len < last.off -> | ||
(* disjoint and strictly smaller *) | ||
t.last <- Some { off; len }; | ||
t.ranges <- Stack.push last t.ranges | ||
| Some _ -> | ||
(* latest range is not strictly smaller than previous, | ||
* this is only expected on legacy data with wrong object ordering | ||
* and is handled as a special case. *) | ||
t.out_of_order <- { off; len } :: t.out_of_order | ||
|
||
let ranges_to_seq t () = | ||
match t.last with | ||
| None -> Seq.Nil | ||
| Some range -> Seq.Cons (range, Stack.to_seq t.ranges) | ||
|
||
let out_of_order_to_seq t = | ||
List.to_seq | ||
@@ List.sort_uniq (fun a b -> Int63.compare a.off b.off) t.out_of_order | ||
|
||
let rec seq_merge xs ys () = | ||
match (xs (), ys ()) with | ||
| Seq.Nil, rest | rest, Seq.Nil -> rest | ||
| Seq.Cons (x, xs'), Seq.Cons (y, ys') -> ( | ||
match Int63.compare x.off y.off with | ||
| 0 -> | ||
assert (x.len = y.len); | ||
Seq.Cons (x, seq_merge xs' ys') | ||
| c when c < 0 -> Seq.Cons (x, seq_merge xs' ys) | ||
| _ -> Seq.Cons (y, seq_merge xs ys')) | ||
|
||
type fused = Disjoint of range * range | Overlap of range | ||
|
||
let fuse fst snd = | ||
let open Int63.Syntax in | ||
let fst_end = fst.off + fst.len in | ||
let snd_end = snd.off + snd.len in | ||
if fst_end < snd.off then Disjoint (fst, snd) | ||
else if snd_end < fst.off then Disjoint (snd, fst) | ||
else | ||
let start = min fst.off snd.off in | ||
let stop = max fst_end snd_end in | ||
Overlap { off = start; len = stop - start } | ||
|
||
let rec seq_fuse ?prev s () = | ||
match (prev, s ()) with | ||
| None, Seq.Nil -> Seq.Nil | ||
| Some prev, Nil -> Seq.Cons (prev, Seq.empty) | ||
| None, Cons (x, xs) -> seq_fuse ~prev:x xs () | ||
| Some prev, Cons (x, xs) -> ( | ||
match fuse x prev with | ||
| Disjoint (fst, snd) -> Seq.Cons (fst, seq_fuse ~prev:snd xs) | ||
| Overlap prev -> seq_fuse ~prev xs ()) | ||
|
||
let iter fn t = | ||
let in_order = ranges_to_seq t in | ||
let ranges = | ||
match t.out_of_order with | ||
| [] -> in_order | ||
| _ -> seq_fuse (seq_merge in_order (out_of_order_to_seq t)) | ||
in | ||
Seq.iter (fun { off; len } -> fn ~off ~len) ranges |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
(* | ||
* Copyright (c) 2022-2023 Tarides <contact@tarides.com> | ||
* | ||
* Permission to use, copy, modify, and distribute this software for any | ||
* purpose with or without fee is hereby granted, provided that the above | ||
* copyright notice and this permission notice appear in all copies. | ||
* | ||
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | ||
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | ||
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | ||
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | ||
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | ||
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | ||
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | ||
*) | ||
|
||
open! Import | ||
|
||
type t | ||
(** An ordered set of disjoint [(offset, length)] ranges. *) | ||
|
||
val make : unit -> t | ||
(** [make ()] returns a new empty set of ranges. *) | ||
|
||
val add : off:int63 -> len:int -> t -> unit | ||
(** [add ~off ~len t] inserts the range [(off, len)] into [t]. When [add] is | ||
called multiple times sequentially, it is optimized for strictly decreasing | ||
offsets arguments. *) | ||
|
||
val iter : (off:int63 -> len:int63 -> unit) -> t -> unit | ||
(** [iter fn t] calls [fn ~off ~len] on every disjoint range [(off, len)] in the | ||
set [t]. The function [fn ~off ~len] is called with strictly increasing | ||
offsets. If two or more consecutive ranges [(off,len)] and [(off+len,len')] | ||
were added to the set [t], a single call to [fn] will be performed on the | ||
englobing interval [(off,len+len')]. *) | ||
|
||
val count : t -> int | ||
(** [count t] returns the number of [add]s performed on [t]. *) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -13,6 +13,7 @@ | |
test_upgrade | ||
test_gc | ||
test_flush_reload | ||
test_ranges | ||
test_mapping | ||
test_nearest_geq | ||
test_dispatcher | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
(* | ||
* Copyright (c) 2018-2022 Tarides <contact@tarides.com> | ||
* | ||
* Permission to use, copy, modify, and distribute this software for any | ||
* purpose with or without fee is hereby granted, provided that the above | ||
* copyright notice and this permission notice appear in all copies. | ||
* | ||
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | ||
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | ||
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | ||
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | ||
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | ||
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | ||
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | ||
*) | ||
|
||
open! Import | ||
module Int63 = Optint.Int63 | ||
module Ranges = Irmin_pack_unix.Ranges | ||
|
||
let test () = | ||
let input = | ||
[ (90, 10); (80, 5); (70, 10); (87, 1); (60, 5); (50, 5); (65, 2); (55, 5) ] | ||
in | ||
let ranges = Ranges.make () in | ||
List.iter | ||
(fun (off, len) -> Ranges.add ~off:(Int63.of_int off) ~len ranges) | ||
input; | ||
let output = ref [] in | ||
Ranges.iter | ||
(fun ~off ~len -> output := (Int63.to_int off, Int63.to_int len) :: !output) | ||
ranges; | ||
let expected = [ (90, 10); (87, 1); (70, 15); (50, 17) ] in | ||
Alcotest.(check (list (pair int int))) "out of order" expected !output; | ||
Lwt.return_unit | ||
|
||
let tests = | ||
[ Alcotest_lwt.test_case "test ranges" `Quick (fun _switch () -> test ()) ] |