Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

irmin-pack: clarify uses of Bytes.unsafe_{to,of}_string #1970

Merged
merged 8 commits into from
Jul 8, 2022
4 changes: 4 additions & 0 deletions src/irmin-pack/inode.ml
Original file line number Diff line number Diff line change
Expand Up @@ -104,8 +104,12 @@ struct
let key =
match Conf.inode_child_order with
| `Hash_bits ->
(* Bytes.unsafe_of_string usage: possibly safe TODO justify safety, or switch to
use the safe Bytes.of_string *)
fun s -> Bytes.unsafe_of_string (hash_to_bin_string (Step.hash s))
| `Seeded_hash | `Custom _ ->
(* Bytes.unsafe_of_string usage: possibly safe TODO justify safety, or switch to
use the safe Bytes.of_string *)
metanivek marked this conversation as resolved.
Show resolved Hide resolved
fun s -> Bytes.unsafe_of_string (step_to_bin_string s)

(* Assume [k = cryto_hash(step)] (see {!key}) and [Conf.entry] can
Expand Down
5 changes: 5 additions & 0 deletions src/irmin-pack/unix/atomic_write.ml
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,11 @@ module Make_persistent (K : Irmin.Type.S) (V : Value.S) = struct
assert (n = 4);
(file_pos := Int63.Syntax.(!file_pos + Int63.of_int 4));
let pos_ref = ref 0 in
(* Bytes.unsafe_to_string usage: We assume Io_legacy.read_block returns unique
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add documentation to Io_legacy.read

ownership of buf back to this function (this assumption holds currently; subsequent
modifications of that code need to ensure this remains the case); then in call to
Bytes.unsafe_to_string we give up ownership of buf (we do not modify the buffer
afterwards) and get ownership of resulting string; so this use is safe. *)
let v = decode_bin (Bytes.unsafe_to_string buf) pos_ref in
assert (!pos_ref = 4);
Int32.to_int v
Expand Down
8 changes: 6 additions & 2 deletions src/irmin-pack/unix/gc.ml
Original file line number Diff line number Diff line change
Expand Up @@ -117,8 +117,8 @@ module Make (Args : Args) : S with module Args := Args = struct
let len' = Int63.to_int len in
read_exn ~off ~len:len' buffer;
let () =
if len = buffer_size then append_exn (Bytes.unsafe_to_string buffer)
else append_exn (String.sub (Bytes.unsafe_to_string buffer) 0 len')
if len = buffer_size then append_exn (Bytes.to_string buffer)
else append_exn (String.sub (Bytes.to_string buffer) 0 len')
in
let len_remaining = len_remaining - len in
if len_remaining > Int63.zero then aux (off + len) len_remaining
Expand Down Expand Up @@ -179,6 +179,10 @@ module Make (Args : Args) : S with module Args := Args = struct
read_exn ~off ~len buffer;
let poff = Dispatcher.poff_of_entry_exn ~off ~len mapping in
Bytes.set buffer Hash.hash_size magic_parent;
(* Bytes.unsafe_to_string usage: We assume read_exn returns unique ownership of buffer
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add documentation to read_exn

to this function. Then at the call to Bytes.unsafe_to_string we give up unique
ownership to buffer (we do not modify it thereafter) in return for ownership of the
resulting string, which we pass to write_exn. This usage is safe. *)
write_exn ~off:poff ~len (Bytes.unsafe_to_string buffer)

let create_new_suffix ~root ~generation =
Expand Down
9 changes: 7 additions & 2 deletions src/irmin-pack/unix/io.ml
Original file line number Diff line number Diff line change
Expand Up @@ -158,8 +158,9 @@ module Unix = struct
| true, _ -> raise Errors.Closed
| _, true -> raise Errors.RO_not_allowed
| _ ->
(* really_write and following do not mutate the given buffer, so
Bytes.unsafe_of_string is actually safe *)
(* Bytes.unsafe_of_string usage: s has shared ownership; we assume that
Util.really_write does not mutate buf (i.e., only needs shared ownership). This
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Document Util.really_write

usage is safe. *)
let buf = Bytes.unsafe_of_string s in
let () = Util.really_write t.fd off buf 0 len in
Index.Stats.add_write len;
Expand Down Expand Up @@ -199,6 +200,10 @@ module Unix = struct
let buf = Bytes.create len in
try
read_exn t ~off ~len buf;
(* Bytes.unsafe_to_string usage: buf is local to this function, so uniquely
owned. We assume read_exn returns unique ownership of buf to this function. Then
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Document read_exn

at the call to Bytes.unsafe_to_string we give up unique ownership of buf for
ownership of the string. This is safe. *)
Ok (Bytes.unsafe_to_string buf)
with
| Errors.Pack_error ((`Invalid_argument | `Read_out_of_bounds) as e) ->
Expand Down
7 changes: 6 additions & 1 deletion src/irmin-pack/unix/mapping_file.ml
Original file line number Diff line number Diff line change
Expand Up @@ -338,14 +338,17 @@ module Make (Errs : Io_errors.S with module Io = Io.Unix) = struct
file0_ref := Some file0;

(* Fill and close [file0] *)
let buffer = Bytes.create 16 in
let register_entry ~off ~len =
(* Write [off, len] in native-endian encoding because it will be read
with mmap. *)
(* if Int63.to_int off < 500 then
* Fmt.epr "register_entry < 500: %d %d\n%!" (Int63.to_int off) len; *)
let buffer = Bytes.create 16 in
Bytes.set_int64_ne buffer 0 (Int63.to_int64 off);
Bytes.set_int64_ne buffer 8 (Int64.of_int len);
(* Bytes.unsafe_to_string usage: buffer is uniquely owned; we assume
Bytes.set_int64_ne returns unique ownership; we give up ownership of buffer in
conversion to string. This is safe. *)
Ao.append_exn file0 (Bytes.unsafe_to_string buffer)
in
let* () = Errs.catch (fun () -> register_entries ~register_entry) in
Expand Down Expand Up @@ -436,6 +439,8 @@ module Make (Errs : Io_errors.S with module Io = Io.Unix) = struct
else
let off, len =
(* Decoding a pair of int can't fail *)
(* Bytes.unsafe_to_string usage: possibly safe TODO justify safety, or convert
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this is safe because the string is immediately decoded with decode_bin_pair and then never used again. Another way to say it: the buffer cannot be modified while the string is being used for decoding.

to Bytes.to_string *)
decode_bin_pair (Bytes.unsafe_to_string buffer) buffer_off
in
let () =
Expand Down
9 changes: 8 additions & 1 deletion src/irmin-pack/unix/pack_index.ml
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,16 @@ module Make (K : Irmin.Hash.S) = struct
Bytes.set_int64_be buf 0 (Int63.to_int64 off);
Bytes.set_int32_be buf 8 (Int32.of_int len);
Bytes.set buf 12 (Pack_value.Kind.to_magic kind);
Bytes.unsafe_to_string buf
(* Bytes.unsafe_to_string usage: buf is local, uniquely owned. We assume the various
functions above return unique ownership of buf. Then in the call to
Bytes.unsafe_to_string we give up unique ownership of buf for ownership of the
resulting string. This is safe. *)
Bytes.unsafe_to_string buf (* safe: see comment above *)
metanivek marked this conversation as resolved.
Show resolved Hide resolved

let decode s pos : t =
(* Bytes.unsafe_of_string usage: s is shared ownership; buf is shared ownership (we
cannot mutate buf); we assume the Bytes.get... functions require only shared
ownership. This usage is safe. *)
metanivek marked this conversation as resolved.
Show resolved Hide resolved
let buf = Bytes.unsafe_of_string s in
let off = Bytes.get_int64_be buf pos |> Int63.of_int64 in
let len = Bytes.get_int32_be buf (pos + 8) |> Int32.to_int in
Expand Down
18 changes: 17 additions & 1 deletion src/irmin-pack/unix/pack_store.ml
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,14 @@ struct
"Attempted to read an entry at offset %a in the pack file, but got \
only %d bytes"
Int63.pp off bytes_read;
let hash = decode_bin_hash (Bytes.unsafe_to_string buf) (ref 0) in
let hash =
(* Bytes.unsafe_to_string usage: buf is created locally, so we have unique
ownership; we assume Dispatcher.read_at_most_exn returns unique ownership; use of
Bytes.unsafe_to_string converts buffer to shared ownership; the rest of the code
seems to require only shared ownership (buffer is read, but not mutated). This is
safe. *)
decode_bin_hash (Bytes.unsafe_to_string buf) (ref 0)
in
let kind = Pack_value.Kind.of_magic_exn (Bytes.get buf Hash.hash_size) in
let size_of_value_and_length_header =
match Val.length_header kind with
Expand All @@ -172,6 +179,8 @@ struct
variable-length length field (if they exist / were read
correctly): *)
let pos_ref = ref length_header_start in
(* Bytes.unsafe_to_string usage: buf is shared at this point; we assume
Varint.decode_bin requires only shared ownership. This usage is safe. *)
let length_header =
Varint.decode_bin (Bytes.unsafe_to_string buf) pos_ref
in
Expand All @@ -195,6 +204,10 @@ struct
let found = Dispatcher.read_if_not_gced t.dispatcher ~off ~len buf in
if (not found) || gced buf then None
else
(* Bytes.unsafe_to_string usafe: buf is create in this function, uniquely owned; we
assume Dispatcher.read_if_not_gced returns unique ownership; then call to
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Document Dispatcher.read_if_not_gced

Bytes.unsafe_to_string gives up ownerhsip of buf for ownership of resulting
string. This is safe. *)
let hash = decode_bin_hash (Bytes.unsafe_to_string buf) (ref 0) in
Some hash

Expand Down Expand Up @@ -295,6 +308,9 @@ struct
let key_of_hash = Pack_key.v_indexed in
let dict = Dict.find t.dict in
let v =
(* Bytes.unsafe_to_string usage: buf created, uniquely owned; after creation, we
assume Dispatcher.read_if_not_gced returns unique ownership; we give up unique
ownership in call to Bytes.unsafe_to_string. This is safe. *)
Val.decode_bin ~key_of_offset ~key_of_hash ~dict
(Bytes.unsafe_to_string buf)
(ref 0)
Expand Down
11 changes: 10 additions & 1 deletion src/irmin-pack/unix/snapshot.ml
Original file line number Diff line number Diff line change
Expand Up @@ -104,8 +104,11 @@ module Make (Args : Args) = struct
io_read_and_decode_entry_prefix ~off:offset t
in
let entry_of_hash hash = key_of_hash hash t.inode_pack in
(* Bytes.unsafe_to_string usage: buf is created locally, uniquely owned; we assume
Dispatcher.read_exn returns unique ownership; then call to Bytes.unsafe_to_string
metanivek marked this conversation as resolved.
Show resolved Hide resolved
gives up unique ownership of buf. This is safe. *)
Inode.Raw.decode_children_offsets ~entry_of_offset ~entry_of_hash
(Bytes.unsafe_to_string buf)
(Bytes.unsafe_to_string buf) (* safe: see comment above *)
(ref 0)

type visit = { visited : Hash.t -> bool; set_visit : Hash.t -> unit }
Expand Down Expand Up @@ -226,9 +229,15 @@ module Make (Args : Args) = struct
let buf = Bytes.create encoded_size in
Bytes.set_int64_be buf 0 (Int63.to_int64 off);
Bytes.set_int32_be buf 8 (Int32.of_int len);
(* Bytes.unsafe_to_string usage: buf is local, uniquely owned; we assume the
Bytes.set... functions return unique ownership; then Bytes.unsafe_to_string
gives up unique ownership of buf. This is safe. *)
Bytes.unsafe_to_string buf

let decode s pos : t =
(* Bytes.unsafe_of_string usage: s is shared; buf is shared (we cannot mutate it);
we assume Bytes.get_... functions need shared ownership only. This usage is
safe. *)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Similar to another comment. The lifecycle of the buffer seems important to the safety here.

let buf = Bytes.unsafe_of_string s in
let off = Bytes.get_int64_be buf pos |> Int63.of_int64 in
let len = Bytes.get_int32_be buf (pos + 8) |> Int32.to_int in
Expand Down
3 changes: 3 additions & 0 deletions src/irmin-pack/unix/traverse_pack_file.ml
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,9 @@ end = struct
else
let buffer_off, off, missing_hash =
match
(* Bytes.unsafe_to_string usage: possibly safe, depending on details of
implementation of decode_entry_exn TODO either justify clearly that this is
safe, or change to use safe Bytes.to_string *)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

decode_entry_exn is likely safe but it would take some work to verify all the decode functions are. (I think it is highly unlikely that they modify the string)

decode_entry_exn ~off
~buffer:(Bytes.unsafe_to_string !buffer)
~buffer_off
Expand Down