diff --git a/src/irmin-pack/inode.ml b/src/irmin-pack/inode.ml index 618b572953..f2232effeb 100644 --- a/src/irmin-pack/inode.ml +++ b/src/irmin-pack/inode.ml @@ -104,8 +104,12 @@ struct let key = match Conf.inode_child_order with | `Hash_bits -> + (* Bytes.unsafe_of_string usage: possibly safe TODO justify safety, or switch to + use the safe Bytes.of_string *) fun s -> Bytes.unsafe_of_string (hash_to_bin_string (Step.hash s)) | `Seeded_hash | `Custom _ -> + (* Bytes.unsafe_of_string usage: possibly safe TODO justify safety, or switch to + use the safe Bytes.of_string *) fun s -> Bytes.unsafe_of_string (step_to_bin_string s) (* Assume [k = cryto_hash(step)] (see {!key}) and [Conf.entry] can diff --git a/src/irmin-pack/unix/atomic_write.ml b/src/irmin-pack/unix/atomic_write.ml index f9d6444947..ec71b30822 100644 --- a/src/irmin-pack/unix/atomic_write.ml +++ b/src/irmin-pack/unix/atomic_write.ml @@ -33,6 +33,11 @@ module Make_persistent (K : Irmin.Type.S) (V : Value.S) = struct assert (n = 4); (file_pos := Int63.Syntax.(!file_pos + Int63.of_int 4)); let pos_ref = ref 0 in + (* Bytes.unsafe_to_string usage: We assume Io_legacy.read_block returns unique + ownership of buf back to this function (this assumption holds currently; subsequent + modifications of that code need to ensure this remains the case); then in call to + Bytes.unsafe_to_string we give up ownership of buf (we do not modify the buffer + afterwards) and get ownership of resulting string; so this use is safe. *) let v = decode_bin (Bytes.unsafe_to_string buf) pos_ref in assert (!pos_ref = 4); Int32.to_int v diff --git a/src/irmin-pack/unix/gc.ml b/src/irmin-pack/unix/gc.ml index 6de26f245c..7089878cc3 100644 --- a/src/irmin-pack/unix/gc.ml +++ b/src/irmin-pack/unix/gc.ml @@ -117,8 +117,8 @@ module Make (Args : Args) : S with module Args := Args = struct let len' = Int63.to_int len in read_exn ~off ~len:len' buffer; let () = - if len = buffer_size then append_exn (Bytes.unsafe_to_string buffer) - else append_exn (String.sub (Bytes.unsafe_to_string buffer) 0 len') + if len = buffer_size then append_exn (Bytes.to_string buffer) + else append_exn (String.sub (Bytes.to_string buffer) 0 len') in let len_remaining = len_remaining - len in if len_remaining > Int63.zero then aux (off + len) len_remaining @@ -179,6 +179,10 @@ module Make (Args : Args) : S with module Args := Args = struct read_exn ~off ~len buffer; let poff = Dispatcher.poff_of_entry_exn ~off ~len mapping in Bytes.set buffer Hash.hash_size magic_parent; + (* Bytes.unsafe_to_string usage: We assume read_exn returns unique ownership of buffer + to this function. Then at the call to Bytes.unsafe_to_string we give up unique + ownership to buffer (we do not modify it thereafter) in return for ownership of the + resulting string, which we pass to write_exn. This usage is safe. *) write_exn ~off:poff ~len (Bytes.unsafe_to_string buffer) let create_new_suffix ~root ~generation = diff --git a/src/irmin-pack/unix/io.ml b/src/irmin-pack/unix/io.ml index 723694c344..27093d55e7 100644 --- a/src/irmin-pack/unix/io.ml +++ b/src/irmin-pack/unix/io.ml @@ -158,8 +158,9 @@ module Unix = struct | true, _ -> raise Errors.Closed | _, true -> raise Errors.RO_not_allowed | _ -> - (* really_write and following do not mutate the given buffer, so - Bytes.unsafe_of_string is actually safe *) + (* Bytes.unsafe_of_string usage: s has shared ownership; we assume that + Util.really_write does not mutate buf (i.e., only needs shared ownership). This + usage is safe. *) let buf = Bytes.unsafe_of_string s in let () = Util.really_write t.fd off buf 0 len in Index.Stats.add_write len; @@ -199,6 +200,10 @@ module Unix = struct let buf = Bytes.create len in try read_exn t ~off ~len buf; + (* Bytes.unsafe_to_string usage: buf is local to this function, so uniquely + owned. We assume read_exn returns unique ownership of buf to this function. Then + at the call to Bytes.unsafe_to_string we give up unique ownership of buf for + ownership of the string. This is safe. *) Ok (Bytes.unsafe_to_string buf) with | Errors.Pack_error ((`Invalid_argument | `Read_out_of_bounds) as e) -> diff --git a/src/irmin-pack/unix/mapping_file.ml b/src/irmin-pack/unix/mapping_file.ml index ad13cae871..7dd51e13b3 100644 --- a/src/irmin-pack/unix/mapping_file.ml +++ b/src/irmin-pack/unix/mapping_file.ml @@ -338,14 +338,17 @@ module Make (Errs : Io_errors.S with module Io = Io.Unix) = struct file0_ref := Some file0; (* Fill and close [file0] *) - let buffer = Bytes.create 16 in let register_entry ~off ~len = (* Write [off, len] in native-endian encoding because it will be read with mmap. *) (* if Int63.to_int off < 500 then * Fmt.epr "register_entry < 500: %d %d\n%!" (Int63.to_int off) len; *) + let buffer = Bytes.create 16 in Bytes.set_int64_ne buffer 0 (Int63.to_int64 off); Bytes.set_int64_ne buffer 8 (Int64.of_int len); + (* Bytes.unsafe_to_string usage: buffer is uniquely owned; we assume + Bytes.set_int64_ne returns unique ownership; we give up ownership of buffer in + conversion to string. This is safe. *) Ao.append_exn file0 (Bytes.unsafe_to_string buffer) in let* () = Errs.catch (fun () -> register_entries ~register_entry) in @@ -436,6 +439,8 @@ module Make (Errs : Io_errors.S with module Io = Io.Unix) = struct else let off, len = (* Decoding a pair of int can't fail *) + (* Bytes.unsafe_to_string usage: possibly safe TODO justify safety, or convert + to Bytes.to_string *) decode_bin_pair (Bytes.unsafe_to_string buffer) buffer_off in let () = diff --git a/src/irmin-pack/unix/pack_index.ml b/src/irmin-pack/unix/pack_index.ml index 9eb01ee0bb..bf113efafe 100644 --- a/src/irmin-pack/unix/pack_index.ml +++ b/src/irmin-pack/unix/pack_index.ml @@ -39,9 +39,17 @@ module Make (K : Irmin.Hash.S) = struct Bytes.set_int64_be buf 0 (Int63.to_int64 off); Bytes.set_int32_be buf 8 (Int32.of_int len); Bytes.set buf 12 (Pack_value.Kind.to_magic kind); + (* Bytes.unsafe_to_string usage: buf is local, uniquely owned. We assume the various + functions above return unique ownership of buf. Then in the call to + Bytes.unsafe_to_string we give up unique ownership of buf for ownership of the + resulting string. This is safe. *) Bytes.unsafe_to_string buf let decode s pos : t = + (* Bytes.unsafe_of_string usage: s is shared ownership; buf is shared ownership (we + cannot mutate buf) and the lifetime of buf ends on return from this function; we + assume the Bytes.get... functions require only shared ownership. This usage is + safe. *) let buf = Bytes.unsafe_of_string s in let off = Bytes.get_int64_be buf pos |> Int63.of_int64 in let len = Bytes.get_int32_be buf (pos + 8) |> Int32.to_int in diff --git a/src/irmin-pack/unix/pack_store.ml b/src/irmin-pack/unix/pack_store.ml index 7b19e7d03c..96d9e5a292 100644 --- a/src/irmin-pack/unix/pack_store.ml +++ b/src/irmin-pack/unix/pack_store.ml @@ -161,7 +161,14 @@ struct "Attempted to read an entry at offset %a in the pack file, but got \ only %d bytes" Int63.pp off bytes_read; - let hash = decode_bin_hash (Bytes.unsafe_to_string buf) (ref 0) in + let hash = + (* Bytes.unsafe_to_string usage: buf is created locally, so we have unique + ownership; we assume Dispatcher.read_at_most_exn returns unique ownership; use of + Bytes.unsafe_to_string converts buffer to shared ownership; the rest of the code + seems to require only shared ownership (buffer is read, but not mutated). This is + safe. *) + decode_bin_hash (Bytes.unsafe_to_string buf) (ref 0) + in let kind = Pack_value.Kind.of_magic_exn (Bytes.get buf Hash.hash_size) in let size_of_value_and_length_header = match Val.length_header kind with @@ -172,6 +179,8 @@ struct variable-length length field (if they exist / were read correctly): *) let pos_ref = ref length_header_start in + (* Bytes.unsafe_to_string usage: buf is shared at this point; we assume + Varint.decode_bin requires only shared ownership. This usage is safe. *) let length_header = Varint.decode_bin (Bytes.unsafe_to_string buf) pos_ref in @@ -195,6 +204,10 @@ struct let found = Dispatcher.read_if_not_gced t.dispatcher ~off ~len buf in if (not found) || gced buf then None else + (* Bytes.unsafe_to_string usafe: buf is create in this function, uniquely owned; we + assume Dispatcher.read_if_not_gced returns unique ownership; then call to + Bytes.unsafe_to_string gives up ownerhsip of buf for ownership of resulting + string. This is safe. *) let hash = decode_bin_hash (Bytes.unsafe_to_string buf) (ref 0) in Some hash @@ -295,6 +308,9 @@ struct let key_of_hash = Pack_key.v_indexed in let dict = Dict.find t.dict in let v = + (* Bytes.unsafe_to_string usage: buf created, uniquely owned; after creation, we + assume Dispatcher.read_if_not_gced returns unique ownership; we give up unique + ownership in call to Bytes.unsafe_to_string. This is safe. *) Val.decode_bin ~key_of_offset ~key_of_hash ~dict (Bytes.unsafe_to_string buf) (ref 0) diff --git a/src/irmin-pack/unix/snapshot.ml b/src/irmin-pack/unix/snapshot.ml index 005f419dc4..c9e3e880ea 100644 --- a/src/irmin-pack/unix/snapshot.ml +++ b/src/irmin-pack/unix/snapshot.ml @@ -104,8 +104,11 @@ module Make (Args : Args) = struct io_read_and_decode_entry_prefix ~off:offset t in let entry_of_hash hash = key_of_hash hash t.inode_pack in + (* Bytes.unsafe_to_string usage: buf is created locally, uniquely owned; we assume + Dispatcher.read_exn returns unique ownership; then call to Bytes.unsafe_to_string + gives up unique ownership of buf. This is safe. *) Inode.Raw.decode_children_offsets ~entry_of_offset ~entry_of_hash - (Bytes.unsafe_to_string buf) + (Bytes.unsafe_to_string buf) (* safe: see comment above *) (ref 0) type visit = { visited : Hash.t -> bool; set_visit : Hash.t -> unit } @@ -226,9 +229,16 @@ module Make (Args : Args) = struct let buf = Bytes.create encoded_size in Bytes.set_int64_be buf 0 (Int63.to_int64 off); Bytes.set_int32_be buf 8 (Int32.of_int len); + (* Bytes.unsafe_to_string usage: buf is local, uniquely owned; we assume the + Bytes.set... functions return unique ownership; then Bytes.unsafe_to_string + gives up unique ownership of buf to get shared ownership of the resulting + string, which is then returned. buf is no longer accessible. This is safe. *) Bytes.unsafe_to_string buf let decode s pos : t = + (* Bytes.unsafe_of_string usage: s is shared; buf is shared (we cannot mutate it); + we assume Bytes.get_... functions need shared ownership only. This usage is + safe. *) let buf = Bytes.unsafe_of_string s in let off = Bytes.get_int64_be buf pos |> Int63.of_int64 in let len = Bytes.get_int32_be buf (pos + 8) |> Int32.to_int in diff --git a/src/irmin-pack/unix/traverse_pack_file.ml b/src/irmin-pack/unix/traverse_pack_file.ml index 93b0e0efe8..760120d37a 100644 --- a/src/irmin-pack/unix/traverse_pack_file.ml +++ b/src/irmin-pack/unix/traverse_pack_file.ml @@ -253,6 +253,9 @@ end = struct else let buffer_off, off, missing_hash = match + (* Bytes.unsafe_to_string usage: possibly safe, depending on details of + implementation of decode_entry_exn TODO either justify clearly that this is + safe, or change to use safe Bytes.to_string *) decode_entry_exn ~off ~buffer:(Bytes.unsafe_to_string !buffer) ~buffer_off