Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP: Good gc #1944

Closed
wants to merge 9 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion bench/irmin-pack/trace_replay.ml
Original file line number Diff line number Diff line change
Expand Up @@ -361,6 +361,7 @@ module Make (Store : Store) = struct
let max_ncommits = config.number_of_commits_to_replay in
with_progress_bar ~message:"Replaying trace" ~n:max_ncommits ~unit:"commit"
@@ fun prog ->
Fmt.epr "\n%!";
let t =
{
contexts = Hashtbl.create 3;
Expand Down Expand Up @@ -429,7 +430,8 @@ module Make (Store : Store) = struct
|> Store.Backend.Commit.Key.to_hash)
in
t.commits_since_start_or_gc <- t.commits_since_start_or_gc + 1;
prog 1;
ignore prog;
(* prog 1; *)
aux commit_seq (i + 1)
in
aux commit_seq 0
Expand Down
13 changes: 12 additions & 1 deletion src/irmin-pack/layout.ml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,18 @@ module V3 = struct
let gc_result ~generation =
toplevel ("store." ^ string_of_int generation ^ ".out")

(* TODO layered: Add prefix and mapping *)
let reachable ~generation =
toplevel ("store." ^ string_of_int generation ^ ".reachable")

let sorted ~generation =
toplevel ("store." ^ string_of_int generation ^ ".sorted")

let mapping ~generation =
toplevel ("store." ^ string_of_int generation ^ ".mapping")

let prefix ~generation =
toplevel ("store." ^ string_of_int generation ^ ".prefix")

let all ~generation ~root =
[ suffix ~generation ~root; branch ~root; dict ~root; control ~root ]
end
6 changes: 1 addition & 5 deletions src/irmin-pack/unix/control_file_intf.ml
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,7 @@ module Payload_v3 = struct
entries before that point may be v1 entries. V1 entries need an entry in
index because it is the only place their lenght is stored. *)

type from_v3_gced = {
(* TODO(good gc): Uncomment entry_offset_suffix_start *)
(* entry_offset_suffix_start : int63; *)
generation : int;
}
type from_v3_gced = { entry_offset_suffix_start : int63; generation : int }
[@@deriving irmin]
(** [entry_offset_suffix_start] is 0 if the suffix file was never garbage
collected. Otherwise it is the offset of the very first entry of the
Expand Down
255 changes: 255 additions & 0 deletions src/irmin-pack/unix/dispatcher.ml
Original file line number Diff line number Diff line change
@@ -0,0 +1,255 @@
open Import
module Payload = Control_file.Latest_payload

module type S = sig
module Fm : File_manager.S

type t

val v : root:string -> Fm.t -> (t, [> Fm.Errs.t ]) result

val read_exn : t -> off:int63 -> len:int -> bytes -> unit
(** [read_exn] either reads in the prefix or the suffix file, depending on
[off]. See [Io.read_exn] for the arguments. If it tries to read a gced
object, an exception is raised. *)

val read_at_most_exn : t -> off:int63 -> len:int -> bytes -> int
(** [read_at_most_exn] is similar to [read_exn] but if the end of file is
reached while reading [len] bytes, then only the available bytes are read.
No [`Read_out_of_bounds] error is raised. The number of bytes read are
returned. *)

val end_offset : t -> int63
(** [end_offset] is the end offsets of the pack entries, counting that the
prefix doesn't start at 0. It counts the entries not yet flushed from the
prefix. *)

val read_exn_if_not_gced : t -> off:int63 -> len:int -> bytes -> bool
(** Similar to [read_exn] but returns false if the object was gced, instead of
raising an expection. *)

val offset_of_suffix_off : t -> int63 -> int63
(** [offset_of_suffix_off t suffix_off] converts a suffix offset into a
(global) offset. *)

type mapping

val load_mapping : Fm.Io.t -> (mapping, [> Fm.Errs.t ]) result
val poff_of_entry_exn : mapping -> off:int63 -> len:int -> int63
end

module Intmap = Map.Make (Int63)

(* The following [with module Io = Io.Unix] forces unix *)
module Make (Fm : File_manager.S with module Io = Io.Unix) :
S with module Fm = Fm = struct
module Fm = Fm
module Io = Fm.Io
module Suffix = Fm.Suffix
module Mapping_file = Mapping_file.Make (Fm.Errs)
module Errs = Fm.Errs
module Control = Fm.Control

let read_suffix = ref 0
let read_prefix = ref 0
(*TODO move them in stats*)

type mapping_value = { poff : int63; len : int }
(** [poff] is a prefix offset (i.e. an offset in the prefix file), [len] is
the length of the chunk starting at [poff]. *)

type mapping = mapping_value Intmap.t

type t = { fm : Fm.t; mutable mapping : mapping; root : string }
(** [mapping] is a map from global offset to (offset,len) pairs in the prefix
file *)

let load_mapping io =
let open Result_syntax in
let open Int63 in
let open Int63.Syntax in
let mapping = ref Intmap.empty in
let poff = ref zero in
let f ~off ~len =
Fmt.epr "chunk: off %d len %d poff %d\n%!" (to_int off) len (to_int !poff);
mapping := Intmap.add off { poff = !poff; len } !mapping;
poff := !poff + of_int len
in
let* () = Mapping_file.iter io f in
Ok !mapping

let reload t =
let open Result_syntax in
let* mapping =
match Fm.mapping t.fm with
| None -> Ok Intmap.empty
| Some io -> load_mapping io
in
t.mapping <- mapping;
Ok ()

let v ~root fm =
let open Result_syntax in
let t = { fm; mapping = Intmap.empty; root } in
Fm.register_mapping_consumer fm ~after_reload:(fun () -> reload t);
let* () = reload t in
Ok t

let entry_offset_suffix_start t =
let pl = Control.payload (Fm.control t.fm) in
match pl.status with
| Payload.From_v1_v2_post_upgrade _
| From_v3_used_non_minimal_indexing_strategy | From_v3_no_gc_yet ->
Int63.zero
| T1 | T2 | T3 | T4 | T5 | T6 | T7 | T8 | T9 | T10 | T11 | T12 | T13 | T14
| T15 ->
assert false
| From_v3_gced { entry_offset_suffix_start; _ } -> entry_offset_suffix_start

(* The suffix only know the real offsets, it is in the dispatcher that global
offsets are translated into real ones (i.e. in prefix or suffix offsets). *)
let end_offset t =
let open Int63.Syntax in
Suffix.end_offset (Fm.suffix t.fm) + entry_offset_suffix_start t

(* Adjust the read in suffix, as the global offset [off] is
[off] = [entry_offset_suffix_start] + [suffix_offset]. *)
let suffix_off_of_offset t off =
let open Int63.Syntax in
let entry_offset_suffix_start = entry_offset_suffix_start t in
off - entry_offset_suffix_start

let offset_of_suffix_off t suffix_off =
let open Int63.Syntax in
let entry_offset_suffix_start = entry_offset_suffix_start t in
suffix_off + entry_offset_suffix_start

(* Find the last chunk which is before [off_start] (or at [off_start]). If no
chunk found, then the entry was possibly gced (case 1). If [off_start] is
after the entry's chunk then the entry was possibly gced (case 2). Note
that for these two cases we cannot distinguished between trying to read a
gced entry, or doing an invalid read. We expose two [read_exn] functions
and we handled this upstream. *)
let chunk_of_off_exn mapping off_start =
let open Int63 in
let open Int63.Syntax in
match
Intmap.find_last_opt
(fun chunk_off_start -> chunk_off_start <= off_start)
mapping
with
| None ->
(* Case 1: The entry if before the very first chunk (or there are no
chunks). Possibly the entry was gced. *)
let s =
Fmt.str "offset %a is before the first chunk, or the prefix is empty"
Int63.pp off_start
in
raise (Errors.Pack_error (`Invalid_read_of_gced_object s))
| Some (chunk_off_start, chunk) ->
assert (chunk_off_start <= off_start);
let chunk_len = chunk.len in
let chunk_off_end = chunk_off_start + of_int chunk_len in

(* Case 2: The entry starts after the chunk. Possibly the entry was
gced. *)
(if chunk_off_end <= off_start then
let s =
Fmt.str
"offset %a is supposed to be contained in chunk \
(off=%a,poff=%a,len=%d) but starts after chunk"
Int63.pp off_start Int63.pp chunk_off_start Int63.pp chunk.poff
chunk.len
in
raise (Errors.Pack_error (`Invalid_read_of_gced_object s)));

let shift_in_chunk = off_start - chunk_off_start in
let max_entry_len = of_int chunk_len - shift_in_chunk in

(chunk, shift_in_chunk, max_entry_len)

(* After we find the chunk of an entry, we check that a read is possible in the
chunk. If it's not, this is always an invalid read. *)
let poff_of_entry_exn mapping ~off ~len =
let chunk, shift_in_chunk, max_entry_len = chunk_of_off_exn mapping off in

(* Case 3: The entry ends after the chunk *)
let open Int63 in
let open Int63.Syntax in
(if of_int len > max_entry_len then
let s =
Fmt.str
"entry (off=%a, len=%d) is supposed to be contained in chunk \
(poff=%a,len=%d) and starting at %a but is larger than it can be\n\
\ contained in chunk" Int63.pp off len Int63.pp chunk.poff chunk.len
Int63.pp shift_in_chunk
in
raise (Errors.Pack_error (`Invalid_prefix_read s)));

(* Case 4: Success *)
chunk.poff + shift_in_chunk

let get_prefix fm =
match Fm.prefix fm with
| Some prefix -> prefix
| None -> raise (Errors.Pack_error (`Invalid_prefix_read "no prefix found"))

let read_exn t ~off ~len buf =
let open Int63.Syntax in
let entry_offset_suffix_start = entry_offset_suffix_start t in
if off >= entry_offset_suffix_start then (
incr read_suffix;
let suffix_off = suffix_off_of_offset t off in
try Suffix.read_exn (Fm.suffix t.fm) ~off:suffix_off ~len buf
with e ->
let to_int = Int63.to_int in
Fmt.epr "\n%!";
Fmt.epr "exception!\n%!";
Fmt.epr "%#d %#d %#d %#d\n%!" (to_int off) len
(to_int entry_offset_suffix_start)
(to_int @@ end_offset t);
Fmt.epr "\n%!";
raise e)
else (
incr read_prefix;
let poff = poff_of_entry_exn t.mapping ~off ~len in
let prefix = get_prefix t.fm in
Io.read_exn prefix ~off:poff ~len buf;
())

let read_exn_if_not_gced t ~off ~len buf =
try
read_exn t ~off ~len buf;
true
with Errors.Pack_error (`Invalid_read_of_gced_object _) -> false

let read_at_most_from_suffix_exn t ~off ~len buf =
let bytes_after_off = Int63.sub (end_offset t) off in
let len =
let open Int63.Syntax in
if bytes_after_off < Int63.of_int len then Int63.to_int bytes_after_off
else len
in
let suffix_off = suffix_off_of_offset t off in
Suffix.read_exn (Fm.suffix t.fm) ~off:suffix_off ~len buf;
len

let read_at_most_from_prefix_exn t ~off ~len buf =
let chunk, shift_in_chunk, max_entry_len = chunk_of_off_exn t.mapping off in
let fm = t.fm in
let open Int63 in
let open Int63.Syntax in
let min a b = if a < b then a else b in
let len = min max_entry_len (of_int len) |> to_int in
let poff = chunk.poff + shift_in_chunk in
let prefix = get_prefix fm in
Io.read_exn prefix ~off:poff ~len buf;
len

let read_at_most_exn t ~off ~len buf =
let open Int63.Syntax in
let entry_offset_suffix_start = entry_offset_suffix_start t in
if off >= entry_offset_suffix_start then
read_at_most_from_suffix_exn t ~off ~len buf
else read_at_most_from_prefix_exn t ~off ~len buf
end
32 changes: 18 additions & 14 deletions src/irmin-pack/unix/errors.ml
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ type base_error =
| `Index_failure of string
| `Invalid_layout
| `Corrupted_legacy_file
| `Corrupted_mapping_file of string
| `Pending_flush
| `Rw_not_allowed
| `Migration_needed
Expand All @@ -46,14 +47,17 @@ type base_error =
| `Gc_forbidden_during_batch
| `Unknown_major_pack_version of string
| `Only_minimal_indexing_strategy_allowed
| `Commit_key_is_indexed_and_dangling of string
| `Commit_key_is_dangling of string
| `Dangling_key of string
| `Gc_disallowed
| `Node_or_contents_key_is_indexed of string
| `Commit_parent_key_is_indexed of string
| `Gc_process_error of string
| `Corrupted_gc_result_file of string
| `Gc_process_died_without_result_file of string ]
| `Gc_process_died_without_result_file of string
| `Gc_forbidden_on_32bit_platforms
| `Invalid_prefix_read of string
| `Invalid_read_of_gced_object of string ]
[@@deriving irmin ~pp]
(** [base_error] is the type of most errors that can occur in a [result], except
for errors that have associated exceptions (see below) and backend-specific
Expand All @@ -69,16 +73,16 @@ exception RO_not_allowed = Irmin_pack.RO_not_allowed

(** Error manager *)
module type S = sig
type t

val pp : Format.formatter -> t -> unit
val raise_error : t -> 'a
val log_error : string -> t -> unit
val catch : (unit -> 'a) -> ('a, t) result
val raise_if_error : ('a, t) result -> 'a
val log_if_error : string -> (unit, t) result -> unit
val to_json_string : (int63, t) result -> string
val of_json_string : string -> (int63, t) result
type t = error

val pp : Format.formatter -> [< t ] -> unit
val raise_error : [< t ] -> 'a
val log_error : string -> [< t ] -> unit
val catch : (unit -> 'a) -> ('a, [> t ]) result
val raise_if_error : ('a, [< t ]) result -> 'a
val log_if_error : string -> (unit, [< t ]) result -> unit
val to_json_string : (int63, [< t ]) result -> string
val of_json_string : string -> (int63, [> t ]) result
end

module Base : S with type t = error = struct
Expand All @@ -98,7 +102,7 @@ module Base : S with type t = error = struct

let catch f =
try Ok (f ()) with
| Pack_error e -> Error (e : base_error :> t)
| Pack_error e -> Error (e : base_error :> [> t ])
| RO_not_allowed -> Error `Ro_not_allowed
| Closed -> Error `Closed

Expand All @@ -119,7 +123,7 @@ module Base : S with type t = error = struct
let err_to_t = function
| Closed -> `Closed
| Ro_not_allowed -> `Ro_not_allowed
| Pack_error e -> (e : base_error :> t)
| Pack_error e -> (e : base_error :> [> t ])

let err_result = Irmin.Type.(result int63 err_t)

Expand Down
3 changes: 2 additions & 1 deletion src/irmin-pack/unix/errors_base.ml
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,8 @@ type base_error =
| `Commit_parent_key_is_indexed of string
| `Gc_process_error of string
| `Corrupted_gc_result_file of string
| `Gc_process_died_without_result_file of string ]
| `Gc_process_died_without_result_file of string
| `Gc_forbidden_on_32bit_platforms ]
[@@deriving irmin ~pp]
(** [base_error] is the type of most errors that can occur in a [result], except
[`Io_misc] which depends on the Io module used, and except [`Ro_not_allowed]
Expand Down
Loading