From 2d091a6084161c05e0ba10a386b8189da4a13130 Mon Sep 17 00:00:00 2001 From: gwenaelle Date: Fri, 17 Mar 2023 18:18:59 +0100 Subject: [PATCH 1/2] irmin-pack-tool: Add a tool for last indexed accessible commits --- src/irmin-pack-tools/README.md | 35 +++++- src/irmin-pack-tools/last_accessible/dune | 8 ++ .../last_accessible/last_accessible.ml | 102 ++++++++++++++++++ 3 files changed, 144 insertions(+), 1 deletion(-) create mode 100644 src/irmin-pack-tools/last_accessible/dune create mode 100644 src/irmin-pack-tools/last_accessible/last_accessible.ml diff --git a/src/irmin-pack-tools/README.md b/src/irmin-pack-tools/README.md index 30e038b148..e666128663 100644 --- a/src/irmin-pack-tools/README.md +++ b/src/irmin-pack-tools/README.md @@ -4,6 +4,7 @@ This folder contains several tools meant to provide usefull ways to debug and du Currently, there are the following tools: - [`ppcf`](#ppcf), a json printer for control files - [`ppidx`](#ppidx), a json printer for index folders +- [`last_accessible`](#last_accessible), gives the informations of the last accessible commit registered in index - [`tezos-explorer`](#tezos-explorer), a gui for a fast exploration of tezos stores ## ppcf @@ -53,7 +54,7 @@ It is fairly straightforward: ```shell $ dune exec -- irmin-ppidx ``` -The second one is the path to the root of the store, not the index folder (e.g. `output/root/` where `output/root/index/` exists) +The first argument is the path to the root of the store, not the index folder (e.g. `output/root/` where `output/root/index/` exists) It will typically give the following json output, with one line per entry registered: ``` @@ -95,5 +96,37 @@ However, it might be useful to sort the json output using the offset. You can do $ jq -s 'sort_by(.off)' -- index ``` +## last-valid +This tool searches the index for the commit with the biggest offset still within the size of the suffix files. +It should be usefull whenever someone wants to know to where they can safely revert to instead of reinstalling the whole store, in case of inconsistencies. +It is fairly straightforward: +```shell +$ dune exec -- irmin-last_accessible +``` +The first argument is the path to the root of the store, not the index folder (e.g. `output/root/` where `output/root/index/` exists) +Useful debugging informations can also be printed by setting the right verbosity using the `verbose` or`verbosity=...` flags. + +It will typically give the following json output: +``` +{"hash":"oJgRv9/bIHkNAcbG8TaElD7P/UwXMWL0lrTXbabCqdo=","off":97297154,"len":103,"kind":"Commit_v2"} +``` +Which can be further improved using the tool `jq`: +```shell +$ dune exec -- irmin-last_accessible | jq +``` +Will give: +```json +{ + "hash": "oJgRv9/bIHkNAcbG8TaElD7P/UwXMWL0lrTXbabCqdo=", + "off": 97297154, + "len": 103, + "kind": "Commit_v2" +} +``` + +Note that this tool makes several assumptions about the state of the store: +- The store is an upper store. +- The control file holds the right informations in the case of a gced store + ## tezos-explorer TODO diff --git a/src/irmin-pack-tools/last_accessible/dune b/src/irmin-pack-tools/last_accessible/dune new file mode 100644 index 0000000000..94cffa05b2 --- /dev/null +++ b/src/irmin-pack-tools/last_accessible/dune @@ -0,0 +1,8 @@ +(executable + (public_name irmin-last_accessible) + (package irmin-pack-tools) + (name last_accessible) + (modules last_accessible) + (libraries irmin-pack irmin-pack.unix cmdliner logs) + (preprocess + (pps ppx_irmin.internal))) diff --git a/src/irmin-pack-tools/last_accessible/last_accessible.ml b/src/irmin-pack-tools/last_accessible/last_accessible.ml new file mode 100644 index 0000000000..001b6bbe9f --- /dev/null +++ b/src/irmin-pack-tools/last_accessible/last_accessible.ml @@ -0,0 +1,102 @@ +module Hash = Irmin.Hash.SHA256 +module Index = Irmin_pack_unix.Index.Make (Hash) +module Int63 = Optint.Int63 +module Io = Irmin_pack_unix.Io.Unix +module Io_errors = Irmin_pack_unix.Io_errors.Make (Io) +module Upper_control = Irmin_pack_unix.Control_file.Upper (Io) + +let src = + Logs.Src.create "irmin-pack-tools" ~doc:"irmin-pack-tools last accessible" + +module Log = (val Logs.src_log src : Logs.LOG) + +type t = { hash : string; off : Int63.t; len : int; kind : string } +[@@deriving irmin] + +let get_payload root = + let control_file = Irmin_pack.Layout.V5.control ~root in + let r = Upper_control.read_payload ~path:control_file in + match r with Error err -> Io_errors.raise_error err | Ok payload -> payload + +let get_suffixes_sizes root + (payload : Irmin_pack_unix.Control_file.Payload.Upper.V5.t) = + let r = ref 0 in + for i = payload.chunk_start_idx to payload.chunk_num do + let suffix = Irmin_pack.Layout.V5.suffix_chunk ~chunk_idx:i ~root in + let stats = Unix.stat suffix in + [%log.debug "found chunk at '%s' with size %d" suffix stats.st_size]; + r := !r + stats.st_size + done; + [%log.debug "sum of found suffixes sizes: %d" !r]; + !r + +let get_status_infos (payload : Irmin_pack_unix.Control_file.Payload.Upper.V5.t) + = + match payload.status with + | Gced gced -> + [%log.debug + "store status: gced, found suffix start offset at %a and %a dead bytes" + Int63.pp gced.suffix_start_offset Int63.pp gced.suffix_dead_bytes]; + ( Int63.to_int gced.suffix_start_offset, + Int63.to_int gced.suffix_dead_bytes ) + | _ -> (0, 0) + +let get_last_accessible root off_max = + let r = ref { hash = "n/a"; off = Int63.zero; len = 0; kind = "n/a" } in + let off_max = Int63.of_int off_max in + let f k v = + let hash = Base64.encode_exn (Index.Key.encode k) in + let off, len, kind = v in + match (kind : Irmin_pack.Pack_value.Kind.t) with + | (Commit_v1 | Commit_v2) when off > off_max -> + [%log.warn + "found commit with offset %a in index (higher than maximum \ + accessible offset)" + Int63.pp off] + | (Commit_v1 | Commit_v2) when Int63.add off (Int63.of_int len) > off_max -> + [%log.warn + "found commit with offset %a and length %d in index (too long to fit \ + in the maximum accessible offset)" + Int63.pp off len] + | (Commit_v1 | Commit_v2) when off >= !r.off -> + let kind = Fmt.str "%a" Irmin_pack.Pack_value.Kind.pp kind in + r := { hash; off; len; kind } + | _ -> () + in + let v = Index.v_exn ~readonly:true ~log_size:500_000 root in + Index.iter f v; + !r + +let main root_folder () = + let payload = get_payload root_folder in + let sizes = get_suffixes_sizes root_folder payload in + let start_offset, dead_bytes = get_status_infos payload in + let off_max = sizes + start_offset - dead_bytes in + [%log.debug "last accessible offset: %d" off_max]; + let last_accessible = get_last_accessible root_folder off_max in + Fmt.pr "%a@." (Irmin.Type.pp_json t) last_accessible + +(** Cmdliner **) + +open Cmdliner + +let setup_log level = + Logs.set_level level; + Logs.set_reporter (Logs_fmt.reporter ()) + +let root_folder = + Arg.( + required + & pos 0 (some string) None + & info [] ~docv:"root folder" ~doc:"the path to the store") + +let main_cmd = + let doc = + "gives the last accessible commit informations stored in the index of an \ + upper store" + in + let info = Cmd.info "irmin-last_accessible" ~doc in + Cmd.v info + Term.(const main $ root_folder $ (const setup_log $ Logs_cli.level ())) + +let () = exit (Cmd.eval ~catch:false main_cmd) From 98a8e098191dd40af1c23073b1e7c474417306cf Mon Sep 17 00:00:00 2001 From: gwenaelle Date: Mon, 26 Jun 2023 15:47:05 +0200 Subject: [PATCH 2/2] tmp --- .../last_accessible/last_accessible.ml | 23 +++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/src/irmin-pack-tools/last_accessible/last_accessible.ml b/src/irmin-pack-tools/last_accessible/last_accessible.ml index 001b6bbe9f..02bc4f20a8 100644 --- a/src/irmin-pack-tools/last_accessible/last_accessible.ml +++ b/src/irmin-pack-tools/last_accessible/last_accessible.ml @@ -21,14 +21,16 @@ let get_payload root = let get_suffixes_sizes root (payload : Irmin_pack_unix.Control_file.Payload.Upper.V5.t) = let r = ref 0 in + let last = ref 0 in for i = payload.chunk_start_idx to payload.chunk_num do let suffix = Irmin_pack.Layout.V5.suffix_chunk ~chunk_idx:i ~root in let stats = Unix.stat suffix in [%log.debug "found chunk at '%s' with size %d" suffix stats.st_size]; - r := !r + stats.st_size + r := !r + stats.st_size; + last := stats.st_size done; [%log.debug "sum of found suffixes sizes: %d" !r]; - !r + !r, !last let get_status_infos (payload : Irmin_pack_unix.Control_file.Payload.Upper.V5.t) = @@ -67,14 +69,27 @@ let get_last_accessible root off_max = Index.iter f v; !r +let forge_new_payload (payload: Irmin_pack_unix.Control_file.Payload.Upper.V5.t) last_size off_max last_accessible = + let open Int63.Infix in + let appendable_chunk_poff = + Int63.(of_int last_size - last_accessible.off - of_int last_accessible.len + of_int off_max) + in + (* Check if negativ ? *) + Fmt.epr "new: %a@." Int63.pp appendable_chunk_poff; + { payload with appendable_chunk_poff} + let main root_folder () = let payload = get_payload root_folder in - let sizes = get_suffixes_sizes root_folder payload in + let sizes, last_size = get_suffixes_sizes root_folder payload in let start_offset, dead_bytes = get_status_infos payload in let off_max = sizes + start_offset - dead_bytes in [%log.debug "last accessible offset: %d" off_max]; let last_accessible = get_last_accessible root_folder off_max in - Fmt.pr "%a@." (Irmin.Type.pp_json t) last_accessible + Fmt.pr "%a@." (Irmin.Type.pp_json t) last_accessible; + let pl = forge_new_payload payload last_size off_max last_accessible in + Fmt.pr "%a@." (Irmin.Type.pp_json Irmin_pack_unix.Control_file.Payload.Upper.V5.t) pl; + let _ = Result.get_ok (Upper_control.create_rw ~path:"foo.bar" ~overwrite:true pl) in + () (** Cmdliner **)